Home | History | Annotate | Download | only in intltest
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1999-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   11/10/99    aliu        Creation.
      8 **********************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_TRANSLITERATION
     14 
     15 #include "transtst.h"
     16 #include "unicode/locid.h"
     17 #include "unicode/dtfmtsym.h"
     18 #include "unicode/normlzr.h"
     19 #include "unicode/translit.h"
     20 #include "unicode/uchar.h"
     21 #include "unicode/unifilt.h"
     22 #include "unicode/uniset.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/usetiter.h"
     25 #include "unicode/uscript.h"
     26 #include "unicode/utf16.h"
     27 #include "cpdtrans.h"
     28 #include "nultrans.h"
     29 #include "rbt.h"
     30 #include "rbt_pars.h"
     31 #include "anytrans.h"
     32 #include "esctrn.h"
     33 #include "name2uni.h"
     34 #include "nortrans.h"
     35 #include "remtrans.h"
     36 #include "titletrn.h"
     37 #include "tolowtrn.h"
     38 #include "toupptrn.h"
     39 #include "unesctrn.h"
     40 #include "uni2name.h"
     41 #include "cstring.h"
     42 #include "cmemory.h"
     43 #include <stdio.h>
     44 
     45 /***********************************************************************
     46 
     47                      HOW TO USE THIS TEST FILE
     48                                -or-
     49                   How I developed on two platforms
     50                 without losing (too much of) my mind
     51 
     52 
     53 1. Add new tests by copying/pasting/changing existing tests.  On Java,
     54    any public void method named Test...() taking no parameters becomes
     55    a test.  On C++, you need to modify the header and add a line to
     56    the runIndexedTest() dispatch method.
     57 
     58 2. Make liberal use of the expect() method; it is your friend.
     59 
     60 3. The tests in this file exactly match those in a sister file on the
     61    other side.  The two files are:
     62 
     63    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
     64    icu4c:  source/test/intltest/transtst.cpp
     65 
     66                   ==> THIS IS THE IMPORTANT PART <==
     67 
     68    When you add a test in this file, add it in TransliteratorTest.java
     69    too.  Give it the same name and put it in the same relative place.
     70    This makes maintenance a lot simpler for any poor soul who ends up
     71    trying to synchronize the tests between icu4j and icu4c.
     72 
     73 4. If you MUST enter a test that is NOT paralleled in the sister file,
     74    then add it in the special non-mirrored section.  These are
     75    labeled
     76 
     77      "icu4j ONLY"
     78 
     79    or
     80 
     81      "icu4c ONLY"
     82 
     83    Make sure you document the reason the test is here and not there.
     84 
     85 
     86 Thank you.
     87 The Management
     88 ***********************************************************************/
     89 
     90 // Define character constants thusly to be EBCDIC-friendly
     91 enum {
     92     LEFT_BRACE=((UChar)0x007B), /*{*/
     93     PIPE      =((UChar)0x007C), /*|*/
     94     ZERO      =((UChar)0x0030), /*0*/
     95     UPPER_A   =((UChar)0x0041)  /*A*/
     96 };
     97 
     98 TransliteratorTest::TransliteratorTest()
     99 :   DESERET_DEE((UChar32)0x10414),
    100     DESERET_dee((UChar32)0x1043C)
    101 {
    102 }
    103 
    104 TransliteratorTest::~TransliteratorTest() {}
    105 
    106 void
    107 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
    108                                    const char* &name, char* /*par*/) {
    109     switch (index) {
    110         TESTCASE(0,TestInstantiation);
    111         TESTCASE(1,TestSimpleRules);
    112         TESTCASE(2,TestRuleBasedInverse);
    113         TESTCASE(3,TestKeyboard);
    114         TESTCASE(4,TestKeyboard2);
    115         TESTCASE(5,TestKeyboard3);
    116         TESTCASE(6,TestArabic);
    117         TESTCASE(7,TestCompoundKana);
    118         TESTCASE(8,TestCompoundHex);
    119         TESTCASE(9,TestFiltering);
    120         TESTCASE(10,TestInlineSet);
    121         TESTCASE(11,TestPatternQuoting);
    122         TESTCASE(12,TestJ277);
    123         TESTCASE(13,TestJ243);
    124         TESTCASE(14,TestJ329);
    125         TESTCASE(15,TestSegments);
    126         TESTCASE(16,TestCursorOffset);
    127         TESTCASE(17,TestArbitraryVariableValues);
    128         TESTCASE(18,TestPositionHandling);
    129         TESTCASE(19,TestHiraganaKatakana);
    130         TESTCASE(20,TestCopyJ476);
    131         TESTCASE(21,TestAnchors);
    132         TESTCASE(22,TestInterIndic);
    133         TESTCASE(23,TestFilterIDs);
    134         TESTCASE(24,TestCaseMap);
    135         TESTCASE(25,TestNameMap);
    136         TESTCASE(26,TestLiberalizedID);
    137         TESTCASE(27,TestCreateInstance);
    138         TESTCASE(28,TestNormalizationTransliterator);
    139         TESTCASE(29,TestCompoundRBT);
    140         TESTCASE(30,TestCompoundFilter);
    141         TESTCASE(31,TestRemove);
    142         TESTCASE(32,TestToRules);
    143         TESTCASE(33,TestContext);
    144         TESTCASE(34,TestSupplemental);
    145         TESTCASE(35,TestQuantifier);
    146         TESTCASE(36,TestSTV);
    147         TESTCASE(37,TestCompoundInverse);
    148         TESTCASE(38,TestNFDChainRBT);
    149         TESTCASE(39,TestNullInverse);
    150         TESTCASE(40,TestAliasInverseID);
    151         TESTCASE(41,TestCompoundInverseID);
    152         TESTCASE(42,TestUndefinedVariable);
    153         TESTCASE(43,TestEmptyContext);
    154         TESTCASE(44,TestCompoundFilterID);
    155         TESTCASE(45,TestPropertySet);
    156         TESTCASE(46,TestNewEngine);
    157         TESTCASE(47,TestQuantifiedSegment);
    158         TESTCASE(48,TestDevanagariLatinRT);
    159         TESTCASE(49,TestTeluguLatinRT);
    160         TESTCASE(50,TestCompoundLatinRT);
    161         TESTCASE(51,TestSanskritLatinRT);
    162         TESTCASE(52,TestLocaleInstantiation);
    163         TESTCASE(53,TestTitleAccents);
    164         TESTCASE(54,TestLocaleResource);
    165         TESTCASE(55,TestParseError);
    166         TESTCASE(56,TestOutputSet);
    167         TESTCASE(57,TestVariableRange);
    168         TESTCASE(58,TestInvalidPostContext);
    169         TESTCASE(59,TestIDForms);
    170         TESTCASE(60,TestToRulesMark);
    171         TESTCASE(61,TestEscape);
    172         TESTCASE(62,TestAnchorMasking);
    173         TESTCASE(63,TestDisplayName);
    174         TESTCASE(64,TestSpecialCases);
    175 #if !UCONFIG_NO_FILE_IO
    176         TESTCASE(65,TestIncrementalProgress);
    177 #endif
    178         TESTCASE(66,TestSurrogateCasing);
    179         TESTCASE(67,TestFunction);
    180         TESTCASE(68,TestInvalidBackRef);
    181         TESTCASE(69,TestMulticharStringSet);
    182         TESTCASE(70,TestUserFunction);
    183         TESTCASE(71,TestAnyX);
    184         TESTCASE(72,TestSourceTargetSet);
    185         TESTCASE(73,TestGurmukhiDevanagari);
    186         TESTCASE(74,TestPatternWhiteSpace);
    187         TESTCASE(75,TestAllCodepoints);
    188         TESTCASE(76,TestBoilerplate);
    189         TESTCASE(77,TestAlternateSyntax);
    190         TESTCASE(78,TestBeginEnd);
    191         TESTCASE(79,TestBeginEndToRules);
    192         TESTCASE(80,TestRegisterAlias);
    193         TESTCASE(81,TestRuleStripping);
    194         TESTCASE(82,TestHalfwidthFullwidth);
    195         TESTCASE(83,TestThai);
    196         TESTCASE(84,TestAny);
    197         default: name = ""; break;
    198     }
    199 }
    200 
    201 /**
    202  * Make sure every system transliterator can be instantiated.
    203  *
    204  * ALSO test that the result of toRules() for each rule is a valid
    205  * rule.  Do this here so we don't have to have another test that
    206  * instantiates everything as well.
    207  */
    208 void TransliteratorTest::TestInstantiation() {
    209     UErrorCode ec = U_ZERO_ERROR;
    210     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
    211     assertSuccess("getAvailableIDs()", ec);
    212     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
    213     int32_t n = Transliterator::countAvailableIDs();
    214     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
    215                avail->count(ec) == n);
    216     assertSuccess("count()", ec);
    217     UnicodeString name;
    218     for (int32_t i=0; i<n; ++i) {
    219         const UnicodeString& id = *avail->snext(ec);
    220         if (!assertSuccess("snext()", ec) ||
    221             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
    222             break;
    223         }
    224         UnicodeString id2 = Transliterator::getAvailableID(i);
    225         if (id.length() < 1) {
    226             errln(UnicodeString("FAIL: getAvailableID(") +
    227                   i + ") returned empty string");
    228             continue;
    229         }
    230         if (id != id2) {
    231             errln(UnicodeString("FAIL: getAvailableID(") +
    232                   i + ") != getAvailableIDs().snext()");
    233             continue;
    234         }
    235         UParseError parseError;
    236         UErrorCode status = U_ZERO_ERROR;
    237         Transliterator* t = Transliterator::createInstance(id,
    238                               UTRANS_FORWARD, parseError,status);
    239         name.truncate(0);
    240         Transliterator::getDisplayName(id, name);
    241         if (t == 0) {
    242 #if UCONFIG_NO_BREAK_ITERATION
    243             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
    244             if (id.compare((UnicodeString)"Thai-Latin") != 0)
    245 #endif
    246                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
    247                       /*", parse error " + parseError.code +*/
    248                       ", line " + parseError.line +
    249                       ", offset " + parseError.offset +
    250                       ", pre-context " + prettify(parseError.preContext, TRUE) +
    251                       ", post-context " +prettify(parseError.postContext,TRUE) +
    252                       ", Error: " + u_errorName(status));
    253                 // When createInstance fails, it deletes the failing
    254                 // entry from the available ID list.  We detect this
    255                 // here by looking for a change in countAvailableIDs.
    256             int32_t nn = Transliterator::countAvailableIDs();
    257             if (nn == (n - 1)) {
    258                 n = nn;
    259                 --i; // Compensate for deleted entry
    260             }
    261         } else {
    262             logln(UnicodeString("OK: ") + name + " (" + id + ")");
    263 
    264             // Now test toRules
    265             UnicodeString rules;
    266             t->toRules(rules, TRUE);
    267             Transliterator *u = Transliterator::createFromRules("x",
    268                                     rules, UTRANS_FORWARD, parseError,status);
    269             if (u == 0) {
    270                 errln(UnicodeString("FAIL: ") + id +
    271                       ".createFromRules() => bad rules" +
    272                       /*", parse error " + parseError.code +*/
    273                       ", line " + parseError.line +
    274                       ", offset " + parseError.offset +
    275                       ", context " + prettify(parseError.preContext, TRUE) +
    276                       ", rules: " + prettify(rules, TRUE));
    277             } else {
    278                 delete u;
    279             }
    280             delete t;
    281         }
    282     }
    283     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
    284     assertSuccess("snext()", ec);
    285     delete avail;
    286 
    287     // Now test the failure path
    288     UParseError parseError;
    289     UErrorCode status = U_ZERO_ERROR;
    290     UnicodeString id("<Not a valid Transliterator ID>");
    291     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
    292     if (t != 0) {
    293         errln("FAIL: " + id + " returned a transliterator");
    294         delete t;
    295     } else {
    296         logln("OK: Bogus ID handled properly");
    297     }
    298 }
    299 
    300 void TransliteratorTest::TestSimpleRules(void) {
    301     /* Example: rules 1. ab>x|y
    302      *                2. yc>z
    303      *
    304      * []|eabcd  start - no match, copy e to tranlated buffer
    305      * [e]|abcd  match rule 1 - copy output & adjust cursor
    306      * [ex|y]cd  match rule 2 - copy output & adjust cursor
    307      * [exz]|d   no match, copy d to transliterated buffer
    308      * [exzd]|   done
    309      */
    310     expect(UnicodeString("ab>x|y;", "") +
    311            "yc>z",
    312            "eabcd", "exzd");
    313 
    314     /* Another set of rules:
    315      *    1. ab>x|yzacw
    316      *    2. za>q
    317      *    3. qc>r
    318      *    4. cw>n
    319      *
    320      * []|ab       Rule 1
    321      * [x|yzacw]   No match
    322      * [xy|zacw]   Rule 2
    323      * [xyq|cw]    Rule 4
    324      * [xyqn]|     Done
    325      */
    326     expect(UnicodeString("ab>x|yzacw;") +
    327            "za>q;" +
    328            "qc>r;" +
    329            "cw>n",
    330            "ab", "xyqn");
    331 
    332     /* Test categories
    333      */
    334     UErrorCode status = U_ZERO_ERROR;
    335     UParseError parseError;
    336     Transliterator *t = Transliterator::createFromRules(
    337         "<ID>",
    338         UnicodeString("$dummy=").append((UChar)0xE100) +
    339         UnicodeString(";"
    340                       "$vowel=[aeiouAEIOU];"
    341                       "$lu=[:Lu:];"
    342                       "$vowel } $lu > '!';"
    343                       "$vowel > '&';"
    344                       "'!' { $lu > '^';"
    345                       "$lu > '*';"
    346                       "a > ERROR", ""),
    347         UTRANS_FORWARD, parseError,
    348         status);
    349     if (U_FAILURE(status)) {
    350         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
    351         return;
    352     }
    353     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
    354     delete t;
    355 }
    356 
    357 /**
    358  * Test inline set syntax and set variable syntax.
    359  */
    360 void TransliteratorTest::TestInlineSet(void) {
    361     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
    362     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
    363 
    364     expect(UnicodeString(
    365            "$digit = [0-9];"
    366            "$alpha = [a-zA-Z];"
    367            "$alphanumeric = [$digit $alpha];" // ***
    368            "$special = [^$alphanumeric];"     // ***
    369            "$alphanumeric > '-';"
    370            "$special > '*';", ""),
    371 
    372            "thx-1138", "---*----");
    373 }
    374 
    375 /**
    376  * Create some inverses and confirm that they work.  We have to be
    377  * careful how we do this, since the inverses will not be true
    378  * inverses -- we can't throw any random string at the composition
    379  * of the transliterators and expect the identity function.  F x
    380  * F' != I.  However, if we are careful about the input, we will
    381  * get the expected results.
    382  */
    383 void TransliteratorTest::TestRuleBasedInverse(void) {
    384     UnicodeString RULES =
    385         UnicodeString("abc>zyx;") +
    386         "ab>yz;" +
    387         "bc>zx;" +
    388         "ca>xy;" +
    389         "a>x;" +
    390         "b>y;" +
    391         "c>z;" +
    392 
    393         "abc<zyx;" +
    394         "ab<yz;" +
    395         "bc<zx;" +
    396         "ca<xy;" +
    397         "a<x;" +
    398         "b<y;" +
    399         "c<z;" +
    400 
    401         "";
    402 
    403     const char* DATA[] = {
    404         // Careful here -- random strings will not work.  If we keep
    405         // the left side to the domain and the right side to the range
    406         // we will be okay though (left, abc; right xyz).
    407         "a", "x",
    408         "abcacab", "zyxxxyy",
    409         "caccb", "xyzzy",
    410     };
    411 
    412     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
    413 
    414     UErrorCode status = U_ZERO_ERROR;
    415     UParseError parseError;
    416     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
    417                                 UTRANS_FORWARD, parseError, status);
    418     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
    419                                 UTRANS_REVERSE, parseError, status);
    420     if (U_FAILURE(status)) {
    421         errln("FAIL: RBT constructor failed");
    422         return;
    423     }
    424     for (int32_t i=0; i<DATA_length; i+=2) {
    425         expect(*fwd, DATA[i], DATA[i+1]);
    426         expect(*rev, DATA[i+1], DATA[i]);
    427     }
    428     delete fwd;
    429     delete rev;
    430 }
    431 
    432 /**
    433  * Basic test of keyboard.
    434  */
    435 void TransliteratorTest::TestKeyboard(void) {
    436     UParseError parseError;
    437     UErrorCode status = U_ZERO_ERROR;
    438     Transliterator *t = Transliterator::createFromRules("<ID>",
    439                               UnicodeString("psch>Y;")
    440                               +"ps>y;"
    441                               +"ch>x;"
    442                               +"a>A;",
    443                               UTRANS_FORWARD, parseError,
    444                               status);
    445     if (U_FAILURE(status)) {
    446         errln("FAIL: RBT constructor failed");
    447         return;
    448     }
    449     const char* DATA[] = {
    450         // insertion, buffer
    451         "a", "A",
    452         "p", "Ap",
    453         "s", "Aps",
    454         "c", "Apsc",
    455         "a", "AycA",
    456         "psch", "AycAY",
    457         0, "AycAY", // null means finishKeyboardTransliteration
    458     };
    459 
    460     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    461     delete t;
    462 }
    463 
    464 /**
    465  * Basic test of keyboard with cursor.
    466  */
    467 void TransliteratorTest::TestKeyboard2(void) {
    468     UParseError parseError;
    469     UErrorCode status = U_ZERO_ERROR;
    470     Transliterator *t = Transliterator::createFromRules("<ID>",
    471                               UnicodeString("ych>Y;")
    472                               +"ps>|y;"
    473                               +"ch>x;"
    474                               +"a>A;",
    475                               UTRANS_FORWARD, parseError,
    476                               status);
    477     if (U_FAILURE(status)) {
    478         errln("FAIL: RBT constructor failed");
    479         return;
    480     }
    481     const char* DATA[] = {
    482         // insertion, buffer
    483         "a", "A",
    484         "p", "Ap",
    485         "s", "Aps", // modified for rollback - "Ay",
    486         "c", "Apsc", // modified for rollback - "Ayc",
    487         "a", "AycA",
    488         "p", "AycAp",
    489         "s", "AycAps", // modified for rollback - "AycAy",
    490         "c", "AycApsc", // modified for rollback - "AycAyc",
    491         "h", "AycAY",
    492         0, "AycAY", // null means finishKeyboardTransliteration
    493     };
    494 
    495     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    496     delete t;
    497 }
    498 
    499 /**
    500  * Test keyboard transliteration with back-replacement.
    501  */
    502 void TransliteratorTest::TestKeyboard3(void) {
    503     // We want th>z but t>y.  Furthermore, during keyboard
    504     // transliteration we want t>y then yh>z if t, then h are
    505     // typed.
    506     UnicodeString RULES("t>|y;"
    507                         "yh>z;");
    508 
    509     const char* DATA[] = {
    510         // Column 1: characters to add to buffer (as if typed)
    511         // Column 2: expected appearance of buffer after
    512         //           keyboard xliteration.
    513         "a", "a",
    514         "b", "ab",
    515         "t", "abt", // modified for rollback - "aby",
    516         "c", "abyc",
    517         "t", "abyct", // modified for rollback - "abycy",
    518         "h", "abycz",
    519         0, "abycz", // null means finishKeyboardTransliteration
    520     };
    521 
    522     UParseError parseError;
    523     UErrorCode status = U_ZERO_ERROR;
    524     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
    525     if (U_FAILURE(status)) {
    526         errln("FAIL: RBT constructor failed");
    527         return;
    528     }
    529     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
    530     delete t;
    531 }
    532 
    533 void TransliteratorTest::keyboardAux(const Transliterator& t,
    534                                      const char* DATA[], int32_t DATA_length) {
    535     UErrorCode status = U_ZERO_ERROR;
    536     UTransPosition index={0, 0, 0, 0};
    537     UnicodeString s;
    538     for (int32_t i=0; i<DATA_length; i+=2) {
    539         UnicodeString log;
    540         if (DATA[i] != 0) {
    541             log = s + " + "
    542                 + DATA[i]
    543                 + " -> ";
    544             t.transliterate(s, index, DATA[i], status);
    545         } else {
    546             log = s + " => ";
    547             t.finishTransliteration(s, index);
    548         }
    549         // Show the start index '{' and the cursor '|'
    550         UnicodeString a, b, c;
    551         s.extractBetween(0, index.contextStart, a);
    552         s.extractBetween(index.contextStart, index.start, b);
    553         s.extractBetween(index.start, s.length(), c);
    554         log.append(a).
    555             append((UChar)LEFT_BRACE).
    556             append(b).
    557             append((UChar)PIPE).
    558             append(c);
    559         if (s == DATA[i+1] && U_SUCCESS(status)) {
    560             logln(log);
    561         } else {
    562             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
    563         }
    564     }
    565 }
    566 
    567 void TransliteratorTest::TestArabic(void) {
    568 // Test disabled for 2.0 until new Arabic transliterator can be written.
    569 //    /*
    570 //    const char* DATA[] = {
    571 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
    572 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
    573 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
    574 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
    575 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
    576 //                  "\u062c\u0645\u064a\u0644\u0629",
    577 //    };
    578 //    */
    579 //
    580 //    UChar ar_raw[] = {
    581 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
    582 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
    583 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
    584 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
    585 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
    586 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
    587 //    };
    588 //    UnicodeString ar(ar_raw);
    589 //    UErrorCode status=U_ZERO_ERROR;
    590 //    UParseError parseError;
    591 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
    592 //    if (t == 0) {
    593 //        errln("FAIL: createInstance failed");
    594 //        return;
    595 //    }
    596 //    expect(*t, "Arabic", ar);
    597 //    delete t;
    598 }
    599 
    600 /**
    601  * Compose the Kana transliterator forward and reverse and try
    602  * some strings that should come out unchanged.
    603  */
    604 void TransliteratorTest::TestCompoundKana(void) {
    605     UParseError parseError;
    606     UErrorCode status = U_ZERO_ERROR;
    607     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
    608     if (t == 0) {
    609         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
    610     } else {
    611         expect(*t, "aaaaa", "aaaaa");
    612         delete t;
    613     }
    614 }
    615 
    616 /**
    617  * Compose the hex transliterators forward and reverse.
    618  */
    619 void TransliteratorTest::TestCompoundHex(void) {
    620     UParseError parseError;
    621     UErrorCode status = U_ZERO_ERROR;
    622     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
    623     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
    624     Transliterator* transab[] = { a, b };
    625     Transliterator* transba[] = { b, a };
    626     if (a == 0 || b == 0) {
    627         errln("FAIL: construction failed");
    628         delete a;
    629         delete b;
    630         return;
    631     }
    632     // Do some basic tests of a
    633     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
    634     // Do some basic tests of b
    635     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
    636 
    637     Transliterator* ab = new CompoundTransliterator(transab, 2);
    638     UnicodeString s("abcde", "");
    639     expect(*ab, s, s);
    640 
    641     UnicodeString str(s);
    642     a->transliterate(str);
    643     Transliterator* ba = new CompoundTransliterator(transba, 2);
    644     expect(*ba, str, str);
    645 
    646     delete ab;
    647     delete ba;
    648     delete a;
    649     delete b;
    650 }
    651 
    652 int gTestFilterClassID = 0;
    653 /**
    654  * Used by TestFiltering().
    655  */
    656 class TestFilter : public UnicodeFilter {
    657     virtual UnicodeFunctor* clone() const {
    658         return new TestFilter(*this);
    659     }
    660     virtual UBool contains(UChar32 c) const {
    661         return c != (UChar)0x0063 /*c*/;
    662     }
    663     // Stubs
    664     virtual UnicodeString& toPattern(UnicodeString& result,
    665                                      UBool /*escapeUnprintable*/) const {
    666         return result;
    667     }
    668     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
    669         return FALSE;
    670     }
    671     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
    672 public:
    673     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
    674 };
    675 
    676 /**
    677  * Do some basic tests of filtering.
    678  */
    679 void TransliteratorTest::TestFiltering(void) {
    680     UParseError parseError;
    681     UErrorCode status = U_ZERO_ERROR;
    682     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
    683     if (hex == 0) {
    684         errln("FAIL: createInstance(Any-Hex) failed");
    685         return;
    686     }
    687     hex->adoptFilter(new TestFilter());
    688     UnicodeString s("abcde");
    689     hex->transliterate(s);
    690     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
    691     if (s == exp) {
    692         logln(UnicodeString("Ok:   \"") + exp + "\"");
    693     } else {
    694         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
    695     }
    696 
    697     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
    698     UnicodeFilter *f = hex->orphanFilter();
    699     if (f == NULL){
    700         errln("FAIL: orphanFilter() should get a UnicodeFilter");
    701     } else {
    702         delete f;
    703     }
    704     delete hex;
    705 }
    706 
    707 /**
    708  * Test anchors
    709  */
    710 void TransliteratorTest::TestAnchors(void) {
    711     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
    712            "aaa",
    713            "012");
    714     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
    715            "aaa",
    716            "012");
    717     expect(UnicodeString("^ab  > 01 ;"
    718            " ab  > |8 ;"
    719            "  b  > k ;"
    720            " 8x$ > 45 ;"
    721            " 8x  > 77 ;", ""),
    722 
    723            "ababbabxabx",
    724            "018k7745");
    725     expect(UnicodeString("$s = [z$] ;"
    726            "$s{ab    > 01 ;"
    727            "   ab    > |8 ;"
    728            "    b    > k ;"
    729            "   8x}$s > 45 ;"
    730            "   8x    > 77 ;", ""),
    731 
    732            "abzababbabxzabxabx",
    733            "01z018k45z01x45");
    734 }
    735 
    736 /**
    737  * Test pattern quoting and escape mechanisms.
    738  */
    739 void TransliteratorTest::TestPatternQuoting(void) {
    740     // Array of 3n items
    741     // Each item is <rules>, <input>, <expected output>
    742     const UnicodeString DATA[] = {
    743         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
    744         UnicodeString(UChar(0x4E01)),
    745         "[male adult]"
    746     };
    747 
    748     for (int32_t i=0; i<3; i+=3) {
    749         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
    750         UParseError parseError;
    751         UErrorCode status = U_ZERO_ERROR;
    752         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    753         if (U_FAILURE(status)) {
    754             errln("RBT constructor failed");
    755         } else {
    756             expect(*t, DATA[i+1], DATA[i+2]);
    757         }
    758         delete t;
    759     }
    760 }
    761 
    762 /**
    763  * Regression test for bugs found in Greek transliteration.
    764  */
    765 void TransliteratorTest::TestJ277(void) {
    766     UErrorCode status = U_ZERO_ERROR;
    767     UParseError parseError;
    768     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
    769     if (gl == NULL) {
    770         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
    771         return;
    772     }
    773 
    774     UChar sigma = 0x3C3;
    775     UChar upsilon = 0x3C5;
    776     UChar nu = 0x3BD;
    777 //    UChar PHI = 0x3A6;
    778     UChar alpha = 0x3B1;
    779 //    UChar omega = 0x3C9;
    780 //    UChar omicron = 0x3BF;
    781 //    UChar epsilon = 0x3B5;
    782 
    783     // sigma upsilon nu -> syn
    784     UnicodeString syn;
    785     syn.append(sigma).append(upsilon).append(nu);
    786     expect(*gl, syn, "syn");
    787 
    788     // sigma alpha upsilon nu -> saun
    789     UnicodeString sayn;
    790     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
    791     expect(*gl, sayn, "saun");
    792 
    793     // Again, using a smaller rule set
    794     UnicodeString rules(
    795                 "$alpha   = \\u03B1;"
    796                 "$nu      = \\u03BD;"
    797                 "$sigma   = \\u03C3;"
    798                 "$ypsilon = \\u03C5;"
    799                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
    800                 "s <>           $sigma;"
    801                 "a <>           $alpha;"
    802                 "u <>  $vowel { $ypsilon;"
    803                 "y <>           $ypsilon;"
    804                 "n <>           $nu;",
    805                 "");
    806     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
    807     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    808     expect(*mini, syn, "syn");
    809     expect(*mini, sayn, "saun");
    810     delete mini;
    811     mini = NULL;
    812 
    813 #if !UCONFIG_NO_FORMATTING
    814     // Transliterate the Greek locale data
    815     Locale el("el");
    816     DateFormatSymbols syms(el, status);
    817     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    818     int32_t i, count;
    819     const UnicodeString* data = syms.getMonths(count);
    820     for (i=0; i<count; ++i) {
    821         if (data[i].length() == 0) {
    822             continue;
    823         }
    824         UnicodeString out(data[i]);
    825         gl->transliterate(out);
    826         UBool ok = TRUE;
    827         if (data[i].length() >= 2 && out.length() >= 2 &&
    828             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
    829             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
    830                 ok = FALSE;
    831             }
    832         }
    833         if (ok) {
    834             logln(prettify(data[i] + " -> " + out));
    835         } else {
    836             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
    837         }
    838     }
    839 #endif
    840 
    841     delete gl;
    842 }
    843 
    844 /**
    845  * Prefix, suffix support in hex transliterators
    846  */
    847 void TransliteratorTest::TestJ243(void) {
    848     UErrorCode ec = U_ZERO_ERROR;
    849 
    850     // Test default Hex-Any, which should handle
    851     // \u, \U, u+, and U+
    852     Transliterator *hex =
    853         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
    854     if (assertSuccess("getInstance", ec)) {
    855         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
    856     }
    857     delete hex;
    858 
    859 //    // Try a custom Hex-Unicode
    860 //    // \uXXXX and &#xXXXX;
    861 //    ec = U_ZERO_ERROR;
    862 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
    863 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
    864 //           "abcd5fx012&#x00033;");
    865 //    // Try custom Any-Hex (default is tested elsewhere)
    866 //    ec = U_ZERO_ERROR;
    867 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
    868 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
    869 }
    870 
    871 /**
    872  * Parsers need better syntax error messages.
    873  */
    874 void TransliteratorTest::TestJ329(void) {
    875 
    876     struct { UBool containsErrors; const char* rule; } DATA[] = {
    877         { FALSE, "a > b; c > d" },
    878         { TRUE,  "a > b; no operator; c > d" },
    879     };
    880     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
    881 
    882     for (int32_t i=0; i<DATA_length; ++i) {
    883         UErrorCode status = U_ZERO_ERROR;
    884         UParseError parseError;
    885         Transliterator *rbt = Transliterator::createFromRules("<ID>",
    886                                     DATA[i].rule,
    887                                     UTRANS_FORWARD,
    888                                     parseError,
    889                                     status);
    890         UBool gotError = U_FAILURE(status);
    891         UnicodeString desc(DATA[i].rule);
    892         desc.append(gotError ? " -> error" : " -> no error");
    893         if (gotError) {
    894             desc = desc + ", ParseError code=" + u_errorName(status) +
    895                 " line=" + parseError.line +
    896                 " offset=" + parseError.offset +
    897                 " context=" + parseError.preContext;
    898         }
    899         if (gotError == DATA[i].containsErrors) {
    900             logln(UnicodeString("Ok:   ") + desc);
    901         } else {
    902             errln(UnicodeString("FAIL: ") + desc);
    903         }
    904         delete rbt;
    905     }
    906 }
    907 
    908 /**
    909  * Test segments and segment references.
    910  */
    911 void TransliteratorTest::TestSegments(void) {
    912     // Array of 3n items
    913     // Each item is <rules>, <input>, <expected output>
    914     UnicodeString DATA[] = {
    915         "([a-z]) '.' ([0-9]) > $2 '-' $1",
    916         "abc.123.xyz.456",
    917         "ab1-c23.xy4-z56",
    918 
    919         // nested
    920         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
    921         "a1 b2",
    922         "a1.a.1 b2.b.2",
    923     };
    924     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    925 
    926     for (int32_t i=0; i<DATA_length; i+=3) {
    927         logln("Pattern: " + prettify(DATA[i]));
    928         UParseError parseError;
    929         UErrorCode status = U_ZERO_ERROR;
    930         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
    931         if (U_FAILURE(status)) {
    932             errln("FAIL: RBT constructor");
    933         } else {
    934             expect(*t, DATA[i+1], DATA[i+2]);
    935         }
    936         delete t;
    937     }
    938 }
    939 
    940 /**
    941  * Test cursor positioning outside of the key
    942  */
    943 void TransliteratorTest::TestCursorOffset(void) {
    944     // Array of 3n items
    945     // Each item is <rules>, <input>, <expected output>
    946     UnicodeString DATA[] = {
    947         "pre {alpha} post > | @ ALPHA ;"
    948         "eALPHA > beta ;"
    949         "pre {beta} post > BETA @@ | ;"
    950         "post > xyz",
    951 
    952         "prealphapost prebetapost",
    953 
    954         "prbetaxyz preBETApost",
    955     };
    956     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    957 
    958     for (int32_t i=0; i<DATA_length; i+=3) {
    959         logln("Pattern: " + prettify(DATA[i]));
    960         UParseError parseError;
    961         UErrorCode status = U_ZERO_ERROR;
    962         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
    963         if (U_FAILURE(status)) {
    964             errln("FAIL: RBT constructor");
    965         } else {
    966             expect(*t, DATA[i+1], DATA[i+2]);
    967         }
    968         delete t;
    969     }
    970 }
    971 
    972 /**
    973  * Test zero length and > 1 char length variable values.  Test
    974  * use of variable refs in UnicodeSets.
    975  */
    976 void TransliteratorTest::TestArbitraryVariableValues(void) {
    977     // Array of 3n items
    978     // Each item is <rules>, <input>, <expected output>
    979     UnicodeString DATA[] = {
    980         "$abe = ab;"
    981         "$pat = x[yY]z;"
    982         "$ll  = 'a-z';"
    983         "$llZ = [$ll];"
    984         "$llY = [$ll$pat];"
    985         "$emp = ;"
    986 
    987         "$abe > ABE;"
    988         "$pat > END;"
    989         "$llZ > 1;"
    990         "$llY > 2;"
    991         "7$emp 8 > 9;"
    992         "",
    993 
    994         "ab xYzxyz stY78",
    995         "ABE ENDEND 1129",
    996     };
    997     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
    998 
    999     for (int32_t i=0; i<DATA_length; i+=3) {
   1000         logln("Pattern: " + prettify(DATA[i]));
   1001         UParseError parseError;
   1002         UErrorCode status = U_ZERO_ERROR;
   1003         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
   1004         if (U_FAILURE(status)) {
   1005             errln("FAIL: RBT constructor");
   1006         } else {
   1007             expect(*t, DATA[i+1], DATA[i+2]);
   1008         }
   1009         delete t;
   1010     }
   1011 }
   1012 
   1013 /**
   1014  * Confirm that the contextStart, contextLimit, start, and limit
   1015  * behave correctly. J474.
   1016  */
   1017 void TransliteratorTest::TestPositionHandling(void) {
   1018     // Array of 3n items
   1019     // Each item is <rules>, <input>, <expected output>
   1020     const char* DATA[] = {
   1021         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
   1022         "xtat txtb", // pos 0,9,0,9
   1023         "xTTaSS TTxUUb",
   1024 
   1025         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
   1026         "xtat txtb", // pos 2,9,3,8
   1027         "xtaSS TTxUUb",
   1028 
   1029         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
   1030         "xtat txtb", // pos 3,8,3,8
   1031         "xtaTT TTxTTb",
   1032     };
   1033 
   1034     // Array of 4n positions -- these go with the DATA array
   1035     // They are: contextStart, contextLimit, start, limit
   1036     int32_t POS[] = {
   1037         0, 9, 0, 9,
   1038         2, 9, 3, 8,
   1039         3, 8, 3, 8,
   1040     };
   1041 
   1042     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
   1043     for (int32_t i=0; i<n; i++) {
   1044         UErrorCode status = U_ZERO_ERROR;
   1045         UParseError parseError;
   1046         Transliterator *t = Transliterator::createFromRules("<ID>",
   1047                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
   1048         if (U_FAILURE(status)) {
   1049             delete t;
   1050             errln("FAIL: RBT constructor");
   1051             return;
   1052         }
   1053         UTransPosition pos;
   1054         pos.contextStart= POS[4*i];
   1055         pos.contextLimit = POS[4*i+1];
   1056         pos.start = POS[4*i+2];
   1057         pos.limit = POS[4*i+3];
   1058         UnicodeString rsource(DATA[3*i+1]);
   1059         t->transliterate(rsource, pos, status);
   1060         if (U_FAILURE(status)) {
   1061             delete t;
   1062             errln("FAIL: transliterate");
   1063             return;
   1064         }
   1065         t->finishTransliteration(rsource, pos);
   1066         expectAux(DATA[3*i],
   1067                   DATA[3*i+1],
   1068                   rsource,
   1069                   DATA[3*i+2]);
   1070         delete t;
   1071     }
   1072 }
   1073 
   1074 /**
   1075  * Test the Hiragana-Katakana transliterator.
   1076  */
   1077 void TransliteratorTest::TestHiraganaKatakana(void) {
   1078     UParseError parseError;
   1079     UErrorCode status = U_ZERO_ERROR;
   1080     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
   1081     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
   1082     if (hk == 0 || kh == 0) {
   1083         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1084         delete hk;
   1085         delete kh;
   1086         return;
   1087     }
   1088 
   1089     // Array of 3n items
   1090     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
   1091     const char* DATA[] = {
   1092         "both",
   1093         "\\u3042\\u3090\\u3099\\u3092\\u3050",
   1094         "\\u30A2\\u30F8\\u30F2\\u30B0",
   1095 
   1096         "kh",
   1097         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
   1098         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
   1099     };
   1100     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   1101 
   1102     for (int32_t i=0; i<DATA_length; i+=3) {
   1103         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
   1104         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
   1105         switch (*DATA[i]) {
   1106         case 0x68: //'h': // Hiragana-Katakana
   1107             expect(*hk, h, k);
   1108             break;
   1109         case 0x6B: //'k': // Katakana-Hiragana
   1110             expect(*kh, k, h);
   1111             break;
   1112         case 0x62: //'b': // both
   1113             expect(*hk, h, k);
   1114             expect(*kh, k, h);
   1115             break;
   1116         }
   1117     }
   1118     delete hk;
   1119     delete kh;
   1120 }
   1121 
   1122 /**
   1123  * Test cloning / copy constructor of RBT.
   1124  */
   1125 void TransliteratorTest::TestCopyJ476(void) {
   1126     // The real test here is what happens when the destructors are
   1127     // called.  So we let one object get destructed, and check to
   1128     // see that its copy still works.
   1129     Transliterator *t2 = 0;
   1130     {
   1131         UParseError parseError;
   1132         UErrorCode status = U_ZERO_ERROR;
   1133         Transliterator *t1 = Transliterator::createFromRules("t1",
   1134             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
   1135         if (U_FAILURE(status)) {
   1136             errln("FAIL: RBT constructor");
   1137             return;
   1138         }
   1139         t2 = t1->clone(); // Call copy constructor under the covers.
   1140         expect(*t1, "abcfoofoo", "ABcbar");
   1141         delete t1;
   1142     }
   1143     expect(*t2, "abcfoofoo", "ABcbar");
   1144     delete t2;
   1145 }
   1146 
   1147 /**
   1148  * Test inter-Indic transliterators.  These are composed.
   1149  * ICU4C Jitterbug 483.
   1150  */
   1151 void TransliteratorTest::TestInterIndic(void) {
   1152     UnicodeString ID("Devanagari-Gujarati", "");
   1153     UErrorCode status = U_ZERO_ERROR;
   1154     UParseError parseError;
   1155     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
   1156     if (dg == 0) {
   1157         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
   1158         return;
   1159     }
   1160     UnicodeString id = dg->getID();
   1161     if (id != ID) {
   1162         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
   1163     }
   1164     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
   1165     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
   1166     expect(*dg, dev, guj);
   1167     delete dg;
   1168 }
   1169 
   1170 /**
   1171  * Test filter syntax in IDs. (J918)
   1172  */
   1173 void TransliteratorTest::TestFilterIDs(void) {
   1174     // Array of 3n strings:
   1175     // <id>, <inverse id>, <input>, <expected output>
   1176     const char* DATA[] = {
   1177         "[aeiou]Any-Hex", // ID
   1178         "[aeiou]Hex-Any", // expected inverse ID
   1179         "quizzical",      // src
   1180         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
   1181 
   1182         "[aeiou]Any-Hex;[^5]Hex-Any",
   1183         "[^5]Any-Hex;[aeiou]Hex-Any",
   1184         "quizzical",
   1185         "q\\u0075izzical",
   1186 
   1187         "[abc]Null",
   1188         "[abc]Null",
   1189         "xyz",
   1190         "xyz",
   1191     };
   1192     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
   1193 
   1194     for (int i=0; i<DATA_length; i+=4) {
   1195         UnicodeString ID(DATA[i], "");
   1196         UnicodeString uID(DATA[i+1], "");
   1197         UnicodeString data2(DATA[i+2], "");
   1198         UnicodeString data3(DATA[i+3], "");
   1199         UParseError parseError;
   1200         UErrorCode status = U_ZERO_ERROR;
   1201         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
   1202         if (t == 0) {
   1203             errln("FAIL: createInstance(" + ID + ") returned NULL");
   1204             return;
   1205         }
   1206         expect(*t, data2, data3);
   1207 
   1208         // Check the ID
   1209         if (ID != t->getID()) {
   1210             errln("FAIL: createInstance(" + ID + ").getID() => " +
   1211                   t->getID());
   1212         }
   1213 
   1214         // Check the inverse
   1215         Transliterator *u = t->createInverse(status);
   1216         if (u == 0) {
   1217             errln("FAIL: " + ID + ".createInverse() returned NULL");
   1218         } else if (u->getID() != uID) {
   1219             errln("FAIL: " + ID + ".createInverse().getID() => " +
   1220                   u->getID() + ", expected " + uID);
   1221         }
   1222 
   1223         delete t;
   1224         delete u;
   1225     }
   1226 }
   1227 
   1228 /**
   1229  * Test the case mapping transliterators.
   1230  */
   1231 void TransliteratorTest::TestCaseMap(void) {
   1232     UParseError parseError;
   1233     UErrorCode status = U_ZERO_ERROR;
   1234     Transliterator* toUpper =
   1235         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1236     Transliterator* toLower =
   1237         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1238     Transliterator* toTitle =
   1239         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
   1240     if (toUpper==0 || toLower==0 || toTitle==0) {
   1241         errln("FAIL: createInstance returned NULL");
   1242         delete toUpper;
   1243         delete toLower;
   1244         delete toTitle;
   1245         return;
   1246     }
   1247 
   1248     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
   1249            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
   1250     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
   1251            "the quick brown foX jumped over the lazY dogs.");
   1252     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
   1253            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
   1254 
   1255     delete toUpper;
   1256     delete toLower;
   1257     delete toTitle;
   1258 }
   1259 
   1260 /**
   1261  * Test the name mapping transliterators.
   1262  */
   1263 void TransliteratorTest::TestNameMap(void) {
   1264     UParseError parseError;
   1265     UErrorCode status = U_ZERO_ERROR;
   1266     Transliterator* uni2name =
   1267         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
   1268     Transliterator* name2uni =
   1269         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
   1270     if (uni2name==0 || name2uni==0) {
   1271         errln("FAIL: createInstance returned NULL");
   1272         delete uni2name;
   1273         delete name2uni;
   1274         return;
   1275     }
   1276 
   1277     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
   1278     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
   1279            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
   1280     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
   1281            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
   1282 
   1283     delete uni2name;
   1284     delete name2uni;
   1285 
   1286     // round trip
   1287     Transliterator* t =
   1288         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
   1289     if (t==0) {
   1290         errln("FAIL: createInstance returned NULL");
   1291         delete t;
   1292         return;
   1293     }
   1294 
   1295     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
   1296     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
   1297     expect(*t, s, s);
   1298     delete t;
   1299 }
   1300 
   1301 /**
   1302  * Test liberalized ID syntax.  1006c
   1303  */
   1304 void TransliteratorTest::TestLiberalizedID(void) {
   1305     // Some test cases have an expected getID() value of NULL.  This
   1306     // means I have disabled the test case for now.  This stuff is
   1307     // still under development, and I haven't decided whether to make
   1308     // getID() return canonical case yet.  It will all get rewritten
   1309     // with the move to Source-Target/Variant IDs anyway. [aliu]
   1310     const char* DATA[] = {
   1311         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
   1312         "  Null  ", "Null", "whitespace",
   1313         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
   1314         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
   1315     };
   1316     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
   1317     UParseError parseError;
   1318     UErrorCode status= U_ZERO_ERROR;
   1319     for (int32_t i=0; i<DATA_length; i+=3) {
   1320         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
   1321         if (t == 0) {
   1322             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
   1323                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
   1324         } else {
   1325             UnicodeString exp;
   1326             if (DATA[i+1]) {
   1327                 exp = UnicodeString(DATA[i+1], "");
   1328             }
   1329             // Don't worry about getID() if the expected char*
   1330             // is NULL -- see above.
   1331             if (exp.length() == 0 || exp == t->getID()) {
   1332                 logln(UnicodeString("Ok: ") + DATA[i+2] +
   1333                       " create ID \"" + DATA[i] + "\" => \"" +
   1334                       exp + "\"");
   1335             } else {
   1336                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
   1337                       " create ID \"" + DATA[i] + "\" => \"" +
   1338                       t->getID() + "\", exp \"" + exp + "\"");
   1339             }
   1340             delete t;
   1341         }
   1342     }
   1343 }
   1344 
   1345 /* test for Jitterbug 912 */
   1346 void TransliteratorTest::TestCreateInstance(){
   1347     const char* FORWARD = "F";
   1348     const char* REVERSE = "R";
   1349     const char* DATA[] = {
   1350         // Column 1: id
   1351         // Column 2: direction
   1352         // Column 3: expected ID, or "" if expect failure
   1353         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
   1354 
   1355         // JB#2689: bad compound causes crash
   1356         "InvalidSource-InvalidTarget", FORWARD, "",
   1357         "InvalidSource-InvalidTarget", REVERSE, "",
   1358         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
   1359         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
   1360         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
   1361         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
   1362 
   1363         NULL
   1364     };
   1365 
   1366     for (int32_t i=0; DATA[i]; i+=3) {
   1367         UParseError err;
   1368         UErrorCode ec = U_ZERO_ERROR;
   1369         UnicodeString id(DATA[i]);
   1370         UTransDirection dir = (DATA[i+1]==FORWARD)?
   1371             UTRANS_FORWARD:UTRANS_REVERSE;
   1372         UnicodeString expID(DATA[i+2]);
   1373         Transliterator* t =
   1374             Transliterator::createInstance(id,dir,err,ec);
   1375         UnicodeString newID;
   1376         if (t) {
   1377             newID = t->getID();
   1378         }
   1379         UBool ok = (newID == expID);
   1380         if (!t) {
   1381             newID = u_errorName(ec);
   1382         }
   1383         if (ok) {
   1384             logln((UnicodeString)"Ok: createInstance(" +
   1385                   id + "," + DATA[i+1] + ") => " + newID);
   1386         } else {
   1387             dataerrln((UnicodeString)"FAIL: createInstance(" +
   1388                   id + "," + DATA[i+1] + ") => " + newID +
   1389                   ", expected " + expID);
   1390         }
   1391         delete t;
   1392     }
   1393 }
   1394 
   1395 /**
   1396  * Test the normalization transliterator.
   1397  */
   1398 void TransliteratorTest::TestNormalizationTransliterator() {
   1399     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
   1400     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
   1401     const char* CANON[] = {
   1402         // Input               Decomposed            Composed
   1403         "cat",                "cat",                "cat"               ,
   1404         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
   1405 
   1406         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
   1407         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
   1408 
   1409         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
   1410         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
   1411         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
   1412 
   1413         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
   1414         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
   1415 
   1416         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
   1417         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
   1418         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
   1419 
   1420         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
   1421         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
   1422 
   1423         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
   1424         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
   1425 
   1426         "Henry IV",           "Henry IV",           "Henry IV"          ,
   1427         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
   1428 
   1429         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
   1430         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
   1431         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
   1432         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
   1433         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
   1434 
   1435         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
   1436         0 // end
   1437     };
   1438 
   1439     const char* COMPAT[] = {
   1440         // Input               Decomposed            Composed
   1441         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
   1442 
   1443         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
   1444         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
   1445 
   1446         "Henry IV",           "Henry IV",           "Henry IV"          ,
   1447         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
   1448 
   1449         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
   1450         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
   1451 
   1452         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
   1453         0 // end
   1454     };
   1455 
   1456     int32_t i;
   1457     UParseError parseError;
   1458     UErrorCode status = U_ZERO_ERROR;
   1459     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
   1460     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
   1461     if (!NFD || !NFC) {
   1462         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
   1463         delete NFD;
   1464         delete NFC;
   1465         return;
   1466     }
   1467     for (i=0; CANON[i]; i+=3) {
   1468         UnicodeString in = CharsToUnicodeString(CANON[i]);
   1469         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
   1470         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
   1471         expect(*NFD, in, expd);
   1472         expect(*NFC, in, expc);
   1473     }
   1474     delete NFD;
   1475     delete NFC;
   1476 
   1477     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
   1478     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
   1479     if (!NFKD || !NFKC) {
   1480         errln("FAIL: createInstance failed");
   1481         delete NFKD;
   1482         delete NFKC;
   1483         return;
   1484     }
   1485     for (i=0; COMPAT[i]; i+=3) {
   1486         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
   1487         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
   1488         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
   1489         expect(*NFKD, in, expkd);
   1490         expect(*NFKC, in, expkc);
   1491     }
   1492     delete NFKD;
   1493     delete NFKC;
   1494 
   1495     UParseError pe;
   1496     status = U_ZERO_ERROR;
   1497     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
   1498                                                        UTRANS_FORWARD,
   1499                                                        pe, status);
   1500     if (t == 0) {
   1501         errln("FAIL: createInstance failed");
   1502     }
   1503     expect(*t, CharsToUnicodeString("\\u010dx"),
   1504            CharsToUnicodeString("c\\u030C"));
   1505     delete t;
   1506 }
   1507 
   1508 /**
   1509  * Test compound RBT rules.
   1510  */
   1511 void TransliteratorTest::TestCompoundRBT(void) {
   1512     // Careful with spacing and ';' here:  Phrase this exactly
   1513     // as toRules() is going to return it.  If toRules() changes
   1514     // with regard to spacing or ';', then adjust this string.
   1515     UnicodeString rule("::Hex-Any;\n"
   1516                        "::Any-Lower;\n"
   1517                        "a > '.A.';\n"
   1518                        "b > '.B.';\n"
   1519                        "::[^t]Any-Upper;", "");
   1520     UParseError parseError;
   1521     UErrorCode status = U_ZERO_ERROR;
   1522     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
   1523     if (t == 0) {
   1524         errln("FAIL: createFromRules failed");
   1525         return;
   1526     }
   1527     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
   1528            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
   1529     UnicodeString r;
   1530     t->toRules(r, TRUE);
   1531     if (r == rule) {
   1532         logln((UnicodeString)"OK: toRules() => " + r);
   1533     } else {
   1534         errln((UnicodeString)"FAIL: toRules() => " + r +
   1535               ", expected " + rule);
   1536     }
   1537     delete t;
   1538 
   1539     // Now test toRules
   1540     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
   1541     if (t == 0) {
   1542         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1543         return;
   1544     }
   1545     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
   1546     t->toRules(r, TRUE);
   1547     if (r != exp) {
   1548         errln((UnicodeString)"FAIL: toRules() => " + r +
   1549               ", expected " + exp);
   1550     } else {
   1551         logln((UnicodeString)"OK: toRules() => " + r);
   1552     }
   1553     delete t;
   1554 
   1555     // Round trip the result of toRules
   1556     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
   1557     if (t == 0) {
   1558         errln("FAIL: createFromRules #2 failed");
   1559         return;
   1560     } else {
   1561         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
   1562     }
   1563 
   1564     // Test toRules again
   1565     t->toRules(r, TRUE);
   1566     if (r != exp) {
   1567         errln((UnicodeString)"FAIL: toRules() => " + r +
   1568               ", expected " + exp);
   1569     } else {
   1570         logln((UnicodeString)"OK: toRules() => " + r);
   1571     }
   1572 
   1573     delete t;
   1574 
   1575     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
   1576     // to what the regenerated ID will look like.
   1577     UnicodeString id("Upper(Lower);(NFKC)", "");
   1578     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
   1579     if (t == 0) {
   1580         errln("FAIL: createInstance #2 failed");
   1581         return;
   1582     }
   1583     if (t->getID() == id) {
   1584         logln((UnicodeString)"OK: created " + id);
   1585     } else {
   1586         errln((UnicodeString)"FAIL: createInstance(" + id +
   1587               ").getID() => " + t->getID());
   1588     }
   1589 
   1590     Transliterator *u = t->createInverse(status);
   1591     if (u == 0) {
   1592         errln("FAIL: createInverse failed");
   1593         delete t;
   1594         return;
   1595     }
   1596     exp = "NFKC();Lower(Upper)";
   1597     if (u->getID() == exp) {
   1598         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
   1599               u->getID());
   1600     } else {
   1601         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
   1602               u->getID());
   1603     }
   1604     delete t;
   1605     delete u;
   1606 }
   1607 
   1608 /**
   1609  * Compound filter semantics were orginially not implemented
   1610  * correctly.  Originally, each component filter f(i) is replaced by
   1611  * f'(i) = f(i) && g, where g is the filter for the compound
   1612  * transliterator.
   1613  *
   1614  * From Mark:
   1615  *
   1616  * Suppose and I have a transliterator X. Internally X is
   1617  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
   1618  *
   1619  * The compound should convert all greek characters (through latin) to
   1620  * cyrillic, then lowercase the result. The filter should say "don't
   1621  * touch 'A' in the original". But because an intermediate result
   1622  * happens to go through "A", the Greek Alpha gets hung up.
   1623  */
   1624 void TransliteratorTest::TestCompoundFilter(void) {
   1625     UParseError parseError;
   1626     UErrorCode status = U_ZERO_ERROR;
   1627     Transliterator *t = Transliterator::createInstance
   1628         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
   1629     if (t == 0) {
   1630         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   1631         return;
   1632     }
   1633     t->adoptFilter(new UnicodeSet("[^A]", status));
   1634     if (U_FAILURE(status)) {
   1635         errln("FAIL: UnicodeSet ct failed");
   1636         delete t;
   1637         return;
   1638     }
   1639 
   1640     // Only the 'A' at index 1 should remain unchanged
   1641     expect(*t,
   1642            CharsToUnicodeString("BA\\u039A\\u0391"),
   1643            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
   1644     delete t;
   1645 }
   1646 
   1647 void TransliteratorTest::TestRemove(void) {
   1648     UParseError parseError;
   1649     UErrorCode status = U_ZERO_ERROR;
   1650     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
   1651     if (t == 0) {
   1652         errln("FAIL: createInstance failed");
   1653         return;
   1654     }
   1655 
   1656     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
   1657 
   1658     // extra test for RemoveTransliterator::clone(), which at one point wasn't
   1659     // duplicating the filter
   1660     Transliterator* t2 = t->clone();
   1661     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
   1662 
   1663     delete t;
   1664     delete t2;
   1665 }
   1666 
   1667 void TransliteratorTest::TestToRules(void) {
   1668     const char* RBT = "rbt";
   1669     const char* SET = "set";
   1670     static const char* DATA[] = {
   1671         RBT,
   1672         "$a=\\u4E61; [$a] > A;",
   1673         "[\\u4E61] > A;",
   1674 
   1675         RBT,
   1676         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
   1677         "[[:Zs:][:Zl:]]{a} > A;",
   1678 
   1679         SET,
   1680         "[[:Zs:][:Zl:]]",
   1681         "[[:Zs:][:Zl:]]",
   1682 
   1683         SET,
   1684         "[:Ps:]",
   1685         "[:Ps:]",
   1686 
   1687         SET,
   1688         "[:L:]",
   1689         "[:L:]",
   1690 
   1691         SET,
   1692         "[[:L:]-[A]]",
   1693         "[[:L:]-[A]]",
   1694 
   1695         SET,
   1696         "[~[:Lu:][:Ll:]]",
   1697         "[~[:Lu:][:Ll:]]",
   1698 
   1699         SET,
   1700         "[~[a-z]]",
   1701         "[~[a-z]]",
   1702 
   1703         RBT,
   1704         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
   1705         "[^[:Zs:]]{a} > A;",
   1706 
   1707         RBT,
   1708         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
   1709         "[[a-z]-[:Zs:]]{a} > A;",
   1710 
   1711         RBT,
   1712         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
   1713         "[[:Zs:]&[a-z]]{a} > A;",
   1714 
   1715         RBT,
   1716         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
   1717         "[x[:Zs:]]{a} > A;",
   1718 
   1719         RBT,
   1720         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
   1721         "$macron = \\u0304 ;"
   1722         "$evowel = [aeiouyAEIOUY] ;"
   1723         "$iotasub = \\u0345 ;"
   1724         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
   1725         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
   1726 
   1727         RBT,
   1728         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
   1729         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
   1730     };
   1731     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   1732 
   1733     for (int32_t d=0; d < DATA_length; d+=3) {
   1734         if (DATA[d] == RBT) {
   1735             // Transliterator test
   1736             UParseError parseError;
   1737             UErrorCode status = U_ZERO_ERROR;
   1738             Transliterator *t = Transliterator::createFromRules("ID",
   1739                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
   1740             if (t == 0) {
   1741                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
   1742                 return;
   1743             }
   1744             UnicodeString rules, escapedRules;
   1745             t->toRules(rules, FALSE);
   1746             t->toRules(escapedRules, TRUE);
   1747             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
   1748             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
   1749             if (rules == expRules) {
   1750                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1751                       " => " + rules);
   1752             } else {
   1753                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1754                       " => " + rules + ", exp " + expRules);
   1755             }
   1756             if (escapedRules == expEscapedRules) {
   1757                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1758                       " => " + escapedRules);
   1759             } else {
   1760                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
   1761                       " => " + escapedRules + ", exp " + expEscapedRules);
   1762             }
   1763             delete t;
   1764 
   1765         } else {
   1766             // UnicodeSet test
   1767             UErrorCode status = U_ZERO_ERROR;
   1768             UnicodeString pat(DATA[d+1], -1, US_INV);
   1769             UnicodeString expToPat(DATA[d+2], -1, US_INV);
   1770             UnicodeSet set(pat, status);
   1771             if (U_FAILURE(status)) {
   1772                 errln("FAIL: UnicodeSet ct failed");
   1773                 return;
   1774             }
   1775             // Adjust spacing etc. as necessary.
   1776             UnicodeString toPat;
   1777             set.toPattern(toPat);
   1778             if (expToPat == toPat) {
   1779                 logln((UnicodeString)"Ok: " + pat +
   1780                       " => " + toPat);
   1781             } else {
   1782                 errln((UnicodeString)"FAIL: " + pat +
   1783                       " => " + prettify(toPat, TRUE) +
   1784                       ", exp " + prettify(pat, TRUE));
   1785             }
   1786         }
   1787     }
   1788 }
   1789 
   1790 void TransliteratorTest::TestContext() {
   1791     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
   1792     expect("de > x; {d}e > y;",
   1793            "de",
   1794            "ye",
   1795            &pos);
   1796 
   1797     expect("ab{c} > z;",
   1798            "xadabdabcy",
   1799            "xadabdabzy");
   1800 }
   1801 
   1802 void TransliteratorTest::TestSupplemental() {
   1803 
   1804     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
   1805                                 "a > $a; $s > i;"),
   1806            CharsToUnicodeString("ab\\U0001030Fx"),
   1807            CharsToUnicodeString("\\U00010300bix"));
   1808 
   1809     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
   1810                                 "$b=[A-Z\\U00010400-\\U0001044D];"
   1811                                 "($a)($b) > $2 $1;"),
   1812            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
   1813            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
   1814 
   1815     // k|ax\\U00010300xm
   1816 
   1817     // k|a\\U00010400\\U00010300xm
   1818     // ky|\\U00010400\\U00010300xm
   1819     // ky\\U00010400|\\U00010300xm
   1820 
   1821     // ky\\U00010400|\\U00010300\\U00010400m
   1822     // ky\\U00010400y|\\U00010400m
   1823     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
   1824                                 "$a {x} > | @ \\U00010400;"
   1825                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
   1826            CharsToUnicodeString("kax\\U00010300xm"),
   1827            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
   1828 
   1829     expectT("Any-Name",
   1830            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
   1831            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
   1832 
   1833     expectT("Any-Hex/Unicode",
   1834            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1835            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
   1836 
   1837     expectT("Any-Hex/C",
   1838            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1839            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
   1840 
   1841     expectT("Any-Hex/Perl",
   1842            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1843            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
   1844 
   1845     expectT("Any-Hex/Java",
   1846            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1847            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
   1848 
   1849     expectT("Any-Hex/XML",
   1850            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1851            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
   1852 
   1853     expectT("Any-Hex/XML10",
   1854            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1855            "&#66352;&#1113856;&#917601;&#160;");
   1856 
   1857     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
   1858            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
   1859            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
   1860 }
   1861 
   1862 void TransliteratorTest::TestQuantifier() {
   1863 
   1864     // Make sure @ in a quantified anteContext works
   1865     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
   1866            "AAAAAb",
   1867            "aaa(aac)");
   1868 
   1869     // Make sure @ in a quantified postContext works
   1870     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
   1871            "baaaaa",
   1872            "caa(aaa)");
   1873 
   1874     // Make sure @ in a quantified postContext with seg ref works
   1875     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
   1876            "baaaaa",
   1877            "baa(aaa)");
   1878 
   1879     // Make sure @ past ante context doesn't enter ante context
   1880     UTransPosition pos = {0, 5, 3, 5};
   1881     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
   1882            "xxxab",
   1883            "xxx(ac)",
   1884            &pos);
   1885 
   1886     // Make sure @ past post context doesn't pass limit
   1887     UTransPosition pos2 = {0, 4, 0, 2};
   1888     expect("{b} a+ > c @@ |; x > y; a > A;",
   1889            "baxx",
   1890            "caxx",
   1891            &pos2);
   1892 
   1893     // Make sure @ past post context doesn't enter post context
   1894     expect("{b} a+ > c @@ |; x > y; a > A;",
   1895            "baxx",
   1896            "cayy");
   1897 
   1898     expect("(ab)? c > d;",
   1899            "c abc ababc",
   1900            "d d abd");
   1901 
   1902     // NOTE: The (ab)+ when referenced just yields a single "ab",
   1903     // not the full sequence of them.  This accords with perl behavior.
   1904     expect("(ab)+ {x} > '(' $1 ')';",
   1905            "x abx ababxy",
   1906            "x ab(ab) abab(ab)y");
   1907 
   1908     expect("b+ > x;",
   1909            "ac abc abbc abbbc",
   1910            "ac axc axc axc");
   1911 
   1912     expect("[abc]+ > x;",
   1913            "qac abrc abbcs abtbbc",
   1914            "qx xrx xs xtx");
   1915 
   1916     expect("q{(ab)+} > x;",
   1917            "qa qab qaba qababc qaba",
   1918            "qa qx qxa qxc qxa");
   1919 
   1920     expect("q(ab)* > x;",
   1921            "qa qab qaba qababc",
   1922            "xa x xa xc");
   1923 
   1924     // NOTE: The (ab)+ when referenced just yields a single "ab",
   1925     // not the full sequence of them.  This accords with perl behavior.
   1926     expect("q(ab)* > '(' $1 ')';",
   1927            "qa qab qaba qababc",
   1928            "()a (ab) (ab)a (ab)c");
   1929 
   1930     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
   1931     // quoted string
   1932     expect("'ab'+ > x;",
   1933            "bb ab ababb",
   1934            "bb x xb");
   1935 
   1936     // $foo+ and $foo* -- the quantifier should apply to the entire
   1937     // variable reference
   1938     expect("$var = ab; $var+ > x;",
   1939            "bb ab ababb",
   1940            "bb x xb");
   1941 }
   1942 
   1943 class TestTrans : public Transliterator {
   1944 public:
   1945     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
   1946     }
   1947     virtual Transliterator* clone(void) const {
   1948         return new TestTrans(getID());
   1949     }
   1950     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
   1951         UBool /*isIncremental*/) const
   1952     {
   1953         offsets.start = offsets.limit;
   1954     }
   1955     virtual UClassID getDynamicClassID() const;
   1956     static UClassID U_EXPORT2 getStaticClassID();
   1957 };
   1958 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
   1959 
   1960 /**
   1961  * Test Source-Target/Variant.
   1962  */
   1963 void TransliteratorTest::TestSTV(void) {
   1964     int32_t ns = Transliterator::countAvailableSources();
   1965     if (ns < 0 || ns > 255) {
   1966         errln((UnicodeString)"FAIL: Bad source count: " + ns);
   1967         return;
   1968     }
   1969     int32_t i, j;
   1970     for (i=0; i<ns; ++i) {
   1971         UnicodeString source;
   1972         Transliterator::getAvailableSource(i, source);
   1973         logln((UnicodeString)"" + i + ": " + source);
   1974         if (source.length() == 0) {
   1975             errln("FAIL: empty source");
   1976             continue;
   1977         }
   1978         int32_t nt = Transliterator::countAvailableTargets(source);
   1979         if (nt < 0 || nt > 255) {
   1980             errln((UnicodeString)"FAIL: Bad target count: " + nt);
   1981             continue;
   1982         }
   1983         for (int32_t j=0; j<nt; ++j) {
   1984             UnicodeString target;
   1985             Transliterator::getAvailableTarget(j, source, target);
   1986             logln((UnicodeString)" " + j + ": " + target);
   1987             if (target.length() == 0) {
   1988                 errln("FAIL: empty target");
   1989                 continue;
   1990             }
   1991             int32_t nv = Transliterator::countAvailableVariants(source, target);
   1992             if (nv < 0 || nv > 255) {
   1993                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
   1994                 continue;
   1995             }
   1996             for (int32_t k=0; k<nv; ++k) {
   1997                 UnicodeString variant;
   1998                 Transliterator::getAvailableVariant(k, source, target, variant);
   1999                 if (variant.length() == 0) {
   2000                     logln((UnicodeString)"  " + k + ": <empty>");
   2001                 } else {
   2002                     logln((UnicodeString)"  " + k + ": " + variant);
   2003                 }
   2004             }
   2005         }
   2006     }
   2007 
   2008     // Test registration
   2009     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
   2010     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
   2011     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
   2012     for (i=0; i<3; ++i) {
   2013         Transliterator *t = new TestTrans(IDS[i]);
   2014         if (t == 0) {
   2015             errln("FAIL: out of memory");
   2016             return;
   2017         }
   2018         if (t->getID() != IDS[i]) {
   2019             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
   2020             delete t;
   2021             return;
   2022         }
   2023         Transliterator::registerInstance(t);
   2024         UErrorCode status = U_ZERO_ERROR;
   2025         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
   2026         if (t == NULL) {
   2027             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
   2028                   IDS[i]);
   2029         } else {
   2030             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
   2031                   IDS[i]);
   2032             delete t;
   2033         }
   2034         Transliterator::unregister(IDS[i]);
   2035         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
   2036         if (t != NULL) {
   2037             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
   2038                   IDS[i]);
   2039             delete t;
   2040         }
   2041     }
   2042 
   2043     // Make sure getAvailable API reflects removal
   2044     int32_t n = Transliterator::countAvailableIDs();
   2045     for (i=0; i<n; ++i) {
   2046         UnicodeString id = Transliterator::getAvailableID(i);
   2047         for (j=0; j<3; ++j) {
   2048             if (id.caseCompare(FULL_IDS[j],0)==0) {
   2049                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
   2050             }
   2051         }
   2052     }
   2053     n = Transliterator::countAvailableTargets("Any");
   2054     for (i=0; i<n; ++i) {
   2055         UnicodeString t;
   2056         Transliterator::getAvailableTarget(i, "Any", t);
   2057         if (t.caseCompare(IDS[0],0)==0) {
   2058             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
   2059         }
   2060     }
   2061     n = Transliterator::countAvailableSources();
   2062     for (i=0; i<n; ++i) {
   2063         UnicodeString s;
   2064         Transliterator::getAvailableSource(i, s);
   2065         for (j=0; j<3; ++j) {
   2066             if (SOURCES[j] == NULL) continue;
   2067             if (s.caseCompare(SOURCES[j],0)==0) {
   2068                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
   2069             }
   2070         }
   2071     }
   2072 }
   2073 
   2074 /**
   2075  * Test inverse of Greek-Latin; Title()
   2076  */
   2077 void TransliteratorTest::TestCompoundInverse(void) {
   2078     UParseError parseError;
   2079     UErrorCode status = U_ZERO_ERROR;
   2080     Transliterator *t = Transliterator::createInstance
   2081         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
   2082     if (t == 0) {
   2083         dataerrln("FAIL: createInstance - %s", u_errorName(status));
   2084         return;
   2085     }
   2086     UnicodeString exp("(Title);Latin-Greek");
   2087     if (t->getID() == exp) {
   2088         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
   2089               t->getID());
   2090     } else {
   2091         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
   2092               t->getID() + "\", expected \"" + exp + "\"");
   2093     }
   2094     delete t;
   2095 }
   2096 
   2097 /**
   2098  * Test NFD chaining with RBT
   2099  */
   2100 void TransliteratorTest::TestNFDChainRBT() {
   2101     UParseError pe;
   2102     UErrorCode ec = U_ZERO_ERROR;
   2103     Transliterator* t = Transliterator::createFromRules(
   2104                                "TEST", "::NFD; aa > Q; a > q;",
   2105                                UTRANS_FORWARD, pe, ec);
   2106     if (t == NULL || U_FAILURE(ec)) {
   2107         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
   2108         return;
   2109     }
   2110     expect(*t, "aa", "Q");
   2111     delete t;
   2112 
   2113     // TEMPORARY TESTS -- BEING DEBUGGED
   2114 //=-    UnicodeString s, s2;
   2115 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
   2116 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
   2117 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
   2118 //=-    expect(*t, s, s2);
   2119 //=-    delete t;
   2120 //=-
   2121 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
   2122 //=-    expect(*t, s2, s);
   2123 //=-    delete t;
   2124 //=-
   2125 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
   2126 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
   2127 //=-    expect(*t, s, s);
   2128 //=-    delete t;
   2129 
   2130 //    const char* source[] = {
   2131 //        /*
   2132 //        "\\u015Br\\u012Bmad",
   2133 //        "bhagavadg\\u012Bt\\u0101",
   2134 //        "adhy\\u0101ya",
   2135 //        "arjuna",
   2136 //        "vi\\u1E63\\u0101da",
   2137 //        "y\\u014Dga",
   2138 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2139 //        "uv\\u0101cr\\u0325",
   2140 //        */
   2141 //        "rmk\\u1E63\\u0113t",
   2142 //      //"dharmak\\u1E63\\u0113tr\\u0113",
   2143 //        /*
   2144 //        "kuruk\\u1E63\\u0113tr\\u0113",
   2145 //        "samav\\u0113t\\u0101",
   2146 //        "yuyutsava-\\u1E25",
   2147 //        "m\\u0101mak\\u0101-\\u1E25",
   2148 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2149 //        "kimakurvata",
   2150 //        "san\\u0304java",
   2151 //        */
   2152 //
   2153 //        0
   2154 //    };
   2155 //    const char* expected[] = {
   2156 //        /*
   2157 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2158 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2159 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2160 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2161 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2162 //        "\\u092f\\u094b\\u0917",
   2163 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2164 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2165 //        */
   2166 //        "\\u0927",
   2167 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2168 //        /*
   2169 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2170 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2171 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2172 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2173 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2174 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2175 //        "\\u0938\\u0902\\u091c\\u0935",
   2176 //        */
   2177 //        0
   2178 //    };
   2179 //    UErrorCode status = U_ZERO_ERROR;
   2180 //    UParseError parseError;
   2181 //    UnicodeString message;
   2182 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2183 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2184 //    if(U_FAILURE(status)){
   2185 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2186 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
   2187 //        delete latinToDevToLatin;
   2188 //        delete devToLatinToDev;
   2189 //        return;
   2190 //    }
   2191 //    UnicodeString gotResult;
   2192 //    for(int i= 0; source[i] != 0; i++){
   2193 //        gotResult = source[i];
   2194 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2195 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
   2196 //    }
   2197 //    delete latinToDevToLatin;
   2198 //    delete devToLatinToDev;
   2199 }
   2200 
   2201 /**
   2202  * Inverse of "Null" should be "Null". (J21)
   2203  */
   2204 void TransliteratorTest::TestNullInverse() {
   2205     UParseError pe;
   2206     UErrorCode ec = U_ZERO_ERROR;
   2207     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
   2208     if (t == 0 || U_FAILURE(ec)) {
   2209         errln("FAIL: createInstance");
   2210         return;
   2211     }
   2212     Transliterator *u = t->createInverse(ec);
   2213     if (u == 0 || U_FAILURE(ec)) {
   2214         errln("FAIL: createInverse");
   2215         delete t;
   2216         return;
   2217     }
   2218     if (u->getID() != "Null") {
   2219         errln("FAIL: Inverse of Null should be Null");
   2220     }
   2221     delete t;
   2222     delete u;
   2223 }
   2224 
   2225 /**
   2226  * Check ID of inverse of alias. (J22)
   2227  */
   2228 void TransliteratorTest::TestAliasInverseID() {
   2229     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
   2230     UParseError pe;
   2231     UErrorCode ec = U_ZERO_ERROR;
   2232     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   2233     if (t == 0 || U_FAILURE(ec)) {
   2234         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
   2235         return;
   2236     }
   2237     Transliterator *u = t->createInverse(ec);
   2238     if (u == 0 || U_FAILURE(ec)) {
   2239         errln("FAIL: createInverse");
   2240         delete t;
   2241         return;
   2242     }
   2243     UnicodeString exp = "Hangul-Latin";
   2244     UnicodeString got = u->getID();
   2245     if (got != exp) {
   2246         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
   2247               ", expected " + exp);
   2248     }
   2249     delete t;
   2250     delete u;
   2251 }
   2252 
   2253 /**
   2254  * Test IDs of inverses of compound transliterators. (J20)
   2255  */
   2256 void TransliteratorTest::TestCompoundInverseID() {
   2257     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
   2258     UParseError pe;
   2259     UErrorCode ec = U_ZERO_ERROR;
   2260     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   2261     if (t == 0 || U_FAILURE(ec)) {
   2262         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
   2263         return;
   2264     }
   2265     Transliterator *u = t->createInverse(ec);
   2266     if (u == 0 || U_FAILURE(ec)) {
   2267         errln("FAIL: createInverse");
   2268         delete t;
   2269         return;
   2270     }
   2271     UnicodeString exp = "NFD(NFC);Jamo-Latin";
   2272     UnicodeString got = u->getID();
   2273     if (got != exp) {
   2274         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
   2275               ", expected " + exp);
   2276     }
   2277     delete t;
   2278     delete u;
   2279 }
   2280 
   2281 /**
   2282  * Test undefined variable.
   2283 
   2284  */
   2285 void TransliteratorTest::TestUndefinedVariable() {
   2286     UnicodeString rule = "$initial } a <> \\u1161;";
   2287     UParseError pe;
   2288     UErrorCode ec = U_ZERO_ERROR;
   2289     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
   2290     delete t;
   2291     if (U_FAILURE(ec)) {
   2292         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
   2293               u_errorName(ec));
   2294         return;
   2295     }
   2296     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
   2297           u_errorName(ec));
   2298 }
   2299 
   2300 /**
   2301  * Test empty context.
   2302  */
   2303 void TransliteratorTest::TestEmptyContext() {
   2304     expect(" { a } > b;", "xay a ", "xby b ");
   2305 }
   2306 
   2307 /**
   2308 * Test compound filter ID syntax
   2309 */
   2310 void TransliteratorTest::TestCompoundFilterID(void) {
   2311     static const char* DATA[] = {
   2312         // Col. 1 = ID or rule set (latter must start with #)
   2313 
   2314         // = columns > 1 are null if expect col. 1 to be illegal =
   2315 
   2316         // Col. 2 = direction, "F..." or "R..."
   2317         // Col. 3 = source string
   2318         // Col. 4 = exp result
   2319 
   2320         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
   2321         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
   2322         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
   2323         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
   2324         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
   2325         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
   2326         NULL,
   2327     };
   2328 
   2329     for (int32_t i=0; DATA[i]; i+=4) {
   2330         UnicodeString id = CharsToUnicodeString(DATA[i]);
   2331         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
   2332             UTRANS_REVERSE : UTRANS_FORWARD;
   2333         UnicodeString source;
   2334         UnicodeString exp;
   2335         if (DATA[i+2] != NULL) {
   2336             source = CharsToUnicodeString(DATA[i+2]);
   2337             exp = CharsToUnicodeString(DATA[i+3]);
   2338         }
   2339         UBool expOk = (DATA[i+1] != NULL);
   2340         Transliterator* t = NULL;
   2341         UParseError pe;
   2342         UErrorCode ec = U_ZERO_ERROR;
   2343         if (id.charAt(0) == 0x23/*#*/) {
   2344             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
   2345         } else {
   2346             t = Transliterator::createInstance(id, direction, pe, ec);
   2347         }
   2348         UBool ok = (t != NULL && U_SUCCESS(ec));
   2349         UnicodeString transID;
   2350         if (t!=0) {
   2351             transID = t->getID();
   2352         }
   2353         else {
   2354             transID = UnicodeString("NULL", "");
   2355         }
   2356         if (ok == expOk) {
   2357             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
   2358                   u_errorName(ec));
   2359             if (source.length() != 0) {
   2360                 expect(*t, source, exp);
   2361             }
   2362             delete t;
   2363         } else {
   2364             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
   2365                   u_errorName(ec));
   2366         }
   2367     }
   2368 }
   2369 
   2370 /**
   2371  * Test new property set syntax
   2372  */
   2373 void TransliteratorTest::TestPropertySet() {
   2374     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
   2375     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
   2376            "[ a stitch ]\n[ in time ]\r[ saves 9]");
   2377 }
   2378 
   2379 /**
   2380  * Test various failure points of the new 2.0 engine.
   2381  */
   2382 void TransliteratorTest::TestNewEngine() {
   2383     UParseError pe;
   2384     UErrorCode ec = U_ZERO_ERROR;
   2385     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
   2386     if (t == 0 || U_FAILURE(ec)) {
   2387         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
   2388         return;
   2389     }
   2390     // Katakana should be untouched
   2391     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
   2392            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
   2393 
   2394     delete t;
   2395 
   2396 #if 1
   2397     // This test will only work if Transliterator.ROLLBACK is
   2398     // true.  Otherwise, this test will fail, revealing a
   2399     // limitation of global filters in incremental mode.
   2400     Transliterator *a =
   2401         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
   2402     Transliterator *A =
   2403         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
   2404     if (U_FAILURE(ec)) {
   2405         delete a;
   2406         delete A;
   2407         return;
   2408     }
   2409 
   2410     Transliterator* array[3];
   2411     array[0] = a;
   2412     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
   2413     array[2] = A;
   2414     if (U_FAILURE(ec)) {
   2415         errln("FAIL: createInstance NFD");
   2416         delete a;
   2417         delete A;
   2418         delete array[1];
   2419         return;
   2420     }
   2421 
   2422     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
   2423     if (U_FAILURE(ec)) {
   2424         errln("FAIL: UnicodeSet constructor");
   2425         delete a;
   2426         delete A;
   2427         delete array[1];
   2428         delete t;
   2429         return;
   2430     }
   2431 
   2432     expect(*t, "aAaA", "bAbA");
   2433 
   2434     assertTrue("countElements", t->countElements() == 3);
   2435     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
   2436     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
   2437     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
   2438     assertSuccess("getElement", ec);
   2439 
   2440     delete a;
   2441     delete A;
   2442     delete array[1];
   2443     delete t;
   2444 #endif
   2445 
   2446     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
   2447            "a",
   2448            "ax");
   2449 
   2450     UnicodeString gr = CharsToUnicodeString(
   2451         "$ddot = \\u0308 ;"
   2452         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
   2453         "$rough = \\u0314 ;"
   2454         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
   2455         "\\u03b1 <> a ;"
   2456         "$rough <> h ;");
   2457 
   2458     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
   2459 }
   2460 
   2461 /**
   2462  * Test quantified segment behavior.  We want:
   2463  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
   2464  */
   2465 void TransliteratorTest::TestQuantifiedSegment(void) {
   2466     // The normal case
   2467     expect("([abc]+) > x $1 x;", "cba", "xcbax");
   2468 
   2469     // The tricky case; the quantifier is around the segment
   2470     expect("([abc])+ > x $1 x;", "cba", "xax");
   2471 
   2472     // Tricky case in reverse direction
   2473     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
   2474 
   2475     // Check post-context segment
   2476     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
   2477 
   2478     // Test toRule/toPattern for non-quantified segment.
   2479     // Careful with spacing here.
   2480     UnicodeString r("([a-c]){q} > x $1 x;");
   2481     UParseError pe;
   2482     UErrorCode ec = U_ZERO_ERROR;
   2483     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
   2484     if (U_FAILURE(ec)) {
   2485         errln("FAIL: createFromRules");
   2486         delete t;
   2487         return;
   2488     }
   2489     UnicodeString rr;
   2490     t->toRules(rr, TRUE);
   2491     if (r != rr) {
   2492         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2493     } else {
   2494         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2495     }
   2496     delete t;
   2497 
   2498     // Test toRule/toPattern for quantified segment.
   2499     // Careful with spacing here.
   2500     r = "([a-c])+{q} > x $1 x;";
   2501     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
   2502     if (U_FAILURE(ec)) {
   2503         errln("FAIL: createFromRules");
   2504         delete t;
   2505         return;
   2506     }
   2507     t->toRules(rr, TRUE);
   2508     if (r != rr) {
   2509         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2510     } else {
   2511         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
   2512     }
   2513     delete t;
   2514 }
   2515 
   2516 //======================================================================
   2517 // Ram's tests
   2518 //======================================================================
   2519 void TransliteratorTest::TestDevanagariLatinRT(){
   2520     const int MAX_LEN= 52;
   2521     const char* const source[MAX_LEN] = {
   2522         "bh\\u0101rata",
   2523         "kra",
   2524         "k\\u1E63a",
   2525         "khra",
   2526         "gra",
   2527         "\\u1E45ra",
   2528         "cra",
   2529         "chra",
   2530         "j\\u00F1a",
   2531         "jhra",
   2532         "\\u00F1ra",
   2533         "\\u1E6Dya",
   2534         "\\u1E6Dhra",
   2535         "\\u1E0Dya",
   2536       //"r\\u0323ya", // \u095c is not valid in Devanagari
   2537         "\\u1E0Dhya",
   2538         "\\u1E5Bhra",
   2539         "\\u1E47ra",
   2540         "tta",
   2541         "thra",
   2542         "dda",
   2543         "dhra",
   2544         "nna",
   2545         "pra",
   2546         "phra",
   2547         "bra",
   2548         "bhra",
   2549         "mra",
   2550         "\\u1E49ra",
   2551       //"l\\u0331ra",
   2552         "yra",
   2553         "\\u1E8Fra",
   2554       //"l-",
   2555         "vra",
   2556         "\\u015Bra",
   2557         "\\u1E63ra",
   2558         "sra",
   2559         "hma",
   2560         "\\u1E6D\\u1E6Da",
   2561         "\\u1E6D\\u1E6Dha",
   2562         "\\u1E6Dh\\u1E6Dha",
   2563         "\\u1E0D\\u1E0Da",
   2564         "\\u1E0D\\u1E0Dha",
   2565         "\\u1E6Dya",
   2566         "\\u1E6Dhya",
   2567         "\\u1E0Dya",
   2568         "\\u1E0Dhya",
   2569         // Not roundtrippable --
   2570         // \\u0939\\u094d\\u094d\\u092E  - hma
   2571         // \\u0939\\u094d\\u092E         - hma
   2572         // CharsToUnicodeString("hma"),
   2573         "hya",
   2574         "\\u015Br\\u0325",
   2575         "\\u015Bca",
   2576         "\\u0115",
   2577         "san\\u0304j\\u012Bb s\\u0113nagupta",
   2578         "\\u0101nand vaddir\\u0101ju",
   2579         "\\u0101",
   2580         "a"
   2581     };
   2582     const char* const expected[MAX_LEN] = {
   2583         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
   2584         "\\u0915\\u094D\\u0930",          /* kra         */
   2585         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
   2586         "\\u0916\\u094D\\u0930",          /* khra        */
   2587         "\\u0917\\u094D\\u0930",          /* gra         */
   2588         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
   2589         "\\u091A\\u094D\\u0930",          /* cra         */
   2590         "\\u091B\\u094D\\u0930",          /* chra        */
   2591         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
   2592         "\\u091D\\u094D\\u0930",          /* jhra        */
   2593         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
   2594         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
   2595         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
   2596         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
   2597       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
   2598         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
   2599         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
   2600         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
   2601         "\\u0924\\u094D\\u0924",          /* tta         */
   2602         "\\u0925\\u094D\\u0930",          /* thra        */
   2603         "\\u0926\\u094D\\u0926",          /* dda         */
   2604         "\\u0927\\u094D\\u0930",          /* dhra        */
   2605         "\\u0928\\u094D\\u0928",          /* nna         */
   2606         "\\u092A\\u094D\\u0930",          /* pra         */
   2607         "\\u092B\\u094D\\u0930",          /* phra        */
   2608         "\\u092C\\u094D\\u0930",          /* bra         */
   2609         "\\u092D\\u094D\\u0930",          /* bhra        */
   2610         "\\u092E\\u094D\\u0930",          /* mra         */
   2611         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
   2612       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
   2613         "\\u092F\\u094D\\u0930",          /* yra         */
   2614         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
   2615       //"l-",
   2616         "\\u0935\\u094D\\u0930",          /* vra         */
   2617         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
   2618         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
   2619         "\\u0938\\u094D\\u0930",          /* sra         */
   2620         "\\u0939\\u094d\\u092E",          /* hma         */
   2621         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
   2622         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
   2623         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
   2624         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
   2625         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
   2626         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
   2627         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
   2628         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
   2629         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
   2630      // "hma",                         /* hma         */
   2631         "\\u0939\\u094D\\u092F",          /* hya         */
   2632         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
   2633         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
   2634         "\\u090d",                        /* e\\u0306    */
   2635         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
   2636         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
   2637         "\\u0906",
   2638         "\\u0905",
   2639     };
   2640     UErrorCode status = U_ZERO_ERROR;
   2641     UParseError parseError;
   2642     UnicodeString message;
   2643     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2644     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2645     if(U_FAILURE(status)){
   2646         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2647         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2648         return;
   2649     }
   2650     UnicodeString gotResult;
   2651     for(int i= 0; i<MAX_LEN; i++){
   2652         gotResult = source[i];
   2653         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2654         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2655     }
   2656     delete latinToDev;
   2657     delete devToLatin;
   2658 }
   2659 
   2660 void TransliteratorTest::TestTeluguLatinRT(){
   2661     const int MAX_LEN=10;
   2662     const char* const source[MAX_LEN] = {
   2663         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
   2664         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
   2665         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
   2666         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
   2667         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
   2668         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
   2669         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
   2670         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
   2671         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
   2672         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
   2673     };
   2674 
   2675     const char* const expected[MAX_LEN] = {
   2676         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
   2677         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
   2678         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
   2679         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
   2680         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
   2681         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
   2682         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
   2683         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
   2684         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
   2685         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
   2686     };
   2687 
   2688     UErrorCode status = U_ZERO_ERROR;
   2689     UParseError parseError;
   2690     UnicodeString message;
   2691     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
   2692     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
   2693     if(U_FAILURE(status)){
   2694         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2695         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2696         return;
   2697     }
   2698     UnicodeString gotResult;
   2699     for(int i= 0; i<MAX_LEN; i++){
   2700         gotResult = source[i];
   2701         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2702         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2703     }
   2704     delete latinToDev;
   2705     delete devToLatin;
   2706 }
   2707 
   2708 void TransliteratorTest::TestSanskritLatinRT(){
   2709     const int MAX_LEN =16;
   2710     const char* const source[MAX_LEN] = {
   2711         "rmk\\u1E63\\u0113t",
   2712         "\\u015Br\\u012Bmad",
   2713         "bhagavadg\\u012Bt\\u0101",
   2714         "adhy\\u0101ya",
   2715         "arjuna",
   2716         "vi\\u1E63\\u0101da",
   2717         "y\\u014Dga",
   2718         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2719         "uv\\u0101cr\\u0325",
   2720         "dharmak\\u1E63\\u0113tr\\u0113",
   2721         "kuruk\\u1E63\\u0113tr\\u0113",
   2722         "samav\\u0113t\\u0101",
   2723         "yuyutsava\\u1E25",
   2724         "m\\u0101mak\\u0101\\u1E25",
   2725     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2726         "kimakurvata",
   2727         "san\\u0304java",
   2728     };
   2729     const char* const expected[MAX_LEN] = {
   2730         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
   2731         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2732         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2733         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2734         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2735         "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2736         "\\u092f\\u094b\\u0917",
   2737         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2738         "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2739         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2740         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2741         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2742         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2743         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2744     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2745         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2746         "\\u0938\\u0902\\u091c\\u0935",
   2747     };
   2748     UErrorCode status = U_ZERO_ERROR;
   2749     UParseError parseError;
   2750     UnicodeString message;
   2751     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2752     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2753     if(U_FAILURE(status)){
   2754         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2755         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2756         return;
   2757     }
   2758     UnicodeString gotResult;
   2759     for(int i= 0; i<MAX_LEN; i++){
   2760         gotResult = source[i];
   2761         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
   2762         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
   2763     }
   2764     delete latinToDev;
   2765     delete devToLatin;
   2766 }
   2767 
   2768 
   2769 void TransliteratorTest::TestCompoundLatinRT(){
   2770     const char* const source[] = {
   2771         "rmk\\u1E63\\u0113t",
   2772         "\\u015Br\\u012Bmad",
   2773         "bhagavadg\\u012Bt\\u0101",
   2774         "adhy\\u0101ya",
   2775         "arjuna",
   2776         "vi\\u1E63\\u0101da",
   2777         "y\\u014Dga",
   2778         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
   2779         "uv\\u0101cr\\u0325",
   2780         "dharmak\\u1E63\\u0113tr\\u0113",
   2781         "kuruk\\u1E63\\u0113tr\\u0113",
   2782         "samav\\u0113t\\u0101",
   2783         "yuyutsava\\u1E25",
   2784         "m\\u0101mak\\u0101\\u1E25",
   2785      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
   2786         "kimakurvata",
   2787         "san\\u0304java"
   2788     };
   2789     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
   2790     const char* const expected[MAX_LEN] = {
   2791         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
   2792         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
   2793         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
   2794         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
   2795         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
   2796         "\\u0935\\u093f\\u0937\\u093e\\u0926",
   2797         "\\u092f\\u094b\\u0917",
   2798         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
   2799         "\\u0909\\u0935\\u093E\\u091A\\u0943",
   2800         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2801         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
   2802         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
   2803         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
   2804         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
   2805     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
   2806         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
   2807         "\\u0938\\u0902\\u091c\\u0935"
   2808     };
   2809     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
   2810         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
   2811         return;
   2812     }
   2813 
   2814     UErrorCode status = U_ZERO_ERROR;
   2815     UParseError parseError;
   2816     UnicodeString message;
   2817     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
   2818     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
   2819     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
   2820     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
   2821 
   2822     if(U_FAILURE(status)){
   2823         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
   2824         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
   2825         return;
   2826     }
   2827     UnicodeString gotResult;
   2828     for(int i= 0; i<MAX_LEN; i++){
   2829         gotResult = source[i];
   2830         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
   2831         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2832         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
   2833 
   2834     }
   2835     delete(latinToDevToLatin);
   2836     delete(devToLatinToDev);
   2837     delete(devToTelToDev);
   2838     delete(latinToTelToLatin);
   2839 }
   2840 
   2841 /**
   2842  * Test Gurmukhi-Devanagari Tippi and Bindi
   2843  */
   2844 void TransliteratorTest::TestGurmukhiDevanagari(){
   2845     // the rule says:
   2846     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
   2847     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
   2848     UErrorCode status = U_ZERO_ERROR;
   2849     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
   2850     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
   2851     UParseError parseError;
   2852 
   2853     UnicodeSetIterator vIter(vowel);
   2854     UnicodeSetIterator nvIter(non_vowel);
   2855     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
   2856     if(U_FAILURE(status)) {
   2857       dataerrln("Error creating transliterator %s", u_errorName(status));
   2858       delete trans;
   2859       return;
   2860     }
   2861     UnicodeString src (" \\u0902", -1, US_INV);
   2862     UnicodeString expected(" \\u0A02", -1, US_INV);
   2863     src = src.unescape();
   2864     expected= expected.unescape();
   2865 
   2866     while(vIter.next()){
   2867         src.setCharAt(0,(UChar) vIter.getCodepoint());
   2868         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
   2869         expect(*trans,src,expected);
   2870     }
   2871 
   2872     expected.setCharAt(1,0x0A70);
   2873     while(nvIter.next()){
   2874         //src.setCharAt(0,(char) nvIter.codepoint);
   2875         src.setCharAt(0,(UChar)nvIter.getCodepoint());
   2876         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
   2877         expect(*trans,src,expected);
   2878     }
   2879     delete trans;
   2880 }
   2881 /**
   2882  * Test instantiation from a locale.
   2883  */
   2884 void TransliteratorTest::TestLocaleInstantiation(void) {
   2885     UParseError pe;
   2886     UErrorCode ec = U_ZERO_ERROR;
   2887     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
   2888     if (U_FAILURE(ec)) {
   2889         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
   2890         delete t;
   2891         return;
   2892     }
   2893     expect(*t, CharsToUnicodeString("\\u0430"), "a");
   2894     delete t;
   2895 
   2896     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
   2897     if (U_FAILURE(ec)) {
   2898         errln("FAIL: createInstance(en-el)");
   2899         delete t;
   2900         return;
   2901     }
   2902     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
   2903     delete t;
   2904 }
   2905 
   2906 /**
   2907  * Test title case handling of accent (should ignore accents)
   2908  */
   2909 void TransliteratorTest::TestTitleAccents(void) {
   2910     UParseError pe;
   2911     UErrorCode ec = U_ZERO_ERROR;
   2912     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
   2913     if (U_FAILURE(ec)) {
   2914         errln("FAIL: createInstance(Title)");
   2915         delete t;
   2916         return;
   2917     }
   2918     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
   2919     delete t;
   2920 }
   2921 
   2922 /**
   2923  * Basic test of a locale resource based rule.
   2924  */
   2925 void TransliteratorTest::TestLocaleResource() {
   2926     const char* DATA[] = {
   2927         // id                    from               to
   2928         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
   2929         "Latin-el",              "b",               "\\u03bc\\u03c0",
   2930         "Latin-Greek",           "b",               "\\u03B2",
   2931         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
   2932         "el-Latin",              "\\u03B2",         "v",
   2933         "Greek-Latin",           "\\u03B2",         "b",
   2934     };
   2935     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
   2936     for (int32_t i=0; i<DATA_length; i+=3) {
   2937         UParseError pe;
   2938         UErrorCode ec = U_ZERO_ERROR;
   2939         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
   2940         if (U_FAILURE(ec)) {
   2941             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
   2942             delete t;
   2943             continue;
   2944         }
   2945         expect(*t, CharsToUnicodeString(DATA[i+1]),
   2946                CharsToUnicodeString(DATA[i+2]));
   2947         delete t;
   2948     }
   2949 }
   2950 
   2951 /**
   2952  * Make sure parse errors reference the right line.
   2953  */
   2954 void TransliteratorTest::TestParseError() {
   2955     static const char* rule =
   2956         "a > b;\n"
   2957         "# more stuff\n"
   2958         "d << b;";
   2959     UErrorCode ec = U_ZERO_ERROR;
   2960     UParseError pe;
   2961     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   2962     delete t;
   2963     if (U_FAILURE(ec)) {
   2964         UnicodeString err(pe.preContext);
   2965         err.append((UChar)124/*|*/).append(pe.postContext);
   2966         if (err.indexOf("d << b") >= 0) {
   2967             logln("Ok: " + err);
   2968         } else {
   2969             errln("FAIL: " + err);
   2970         }
   2971     }
   2972     else {
   2973         errln("FAIL: no syntax error");
   2974     }
   2975     static const char* maskingRule =
   2976         "a>x;\n"
   2977         "# more stuff\n"
   2978         "ab>y;";
   2979     ec = U_ZERO_ERROR;
   2980     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
   2981     if (ec != U_RULE_MASK_ERROR) {
   2982         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
   2983     }
   2984     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
   2985         errln("FAIL: did not get expected precontext");
   2986     }
   2987     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
   2988         errln("FAIL: did not get expected postcontext");
   2989     }
   2990 }
   2991 
   2992 /**
   2993  * Make sure sets on output are disallowed.
   2994  */
   2995 void TransliteratorTest::TestOutputSet() {
   2996     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
   2997     UErrorCode ec = U_ZERO_ERROR;
   2998     UParseError pe;
   2999     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3000     delete t;
   3001     if (U_FAILURE(ec)) {
   3002         UnicodeString err(pe.preContext);
   3003         err.append((UChar)124/*|*/).append(pe.postContext);
   3004         logln("Ok: " + err);
   3005         return;
   3006     }
   3007     errln("FAIL: No syntax error");
   3008 }
   3009 
   3010 /**
   3011  * Test the use variable range pragma, making sure that use of
   3012  * variable range characters is detected and flagged as an error.
   3013  */
   3014 void TransliteratorTest::TestVariableRange() {
   3015     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
   3016     UErrorCode ec = U_ZERO_ERROR;
   3017     UParseError pe;
   3018     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3019     delete t;
   3020     if (U_FAILURE(ec)) {
   3021         UnicodeString err(pe.preContext);
   3022         err.append((UChar)124/*|*/).append(pe.postContext);
   3023         logln("Ok: " + err);
   3024         return;
   3025     }
   3026     errln("FAIL: No syntax error");
   3027 }
   3028 
   3029 /**
   3030  * Test invalid post context error handling
   3031  */
   3032 void TransliteratorTest::TestInvalidPostContext() {
   3033     UnicodeString rule = "a}b{c>d;";
   3034     UErrorCode ec = U_ZERO_ERROR;
   3035     UParseError pe;
   3036     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
   3037     delete t;
   3038     if (U_FAILURE(ec)) {
   3039         UnicodeString err(pe.preContext);
   3040         err.append((UChar)124/*|*/).append(pe.postContext);
   3041         if (err.indexOf("a}b{c") >= 0) {
   3042             logln("Ok: " + err);
   3043         } else {
   3044             errln("FAIL: " + err);
   3045         }
   3046         return;
   3047     }
   3048     errln("FAIL: No syntax error");
   3049 }
   3050 
   3051 /**
   3052  * Test ID form variants
   3053  */
   3054 void TransliteratorTest::TestIDForms() {
   3055     const char* DATA[] = {
   3056         "NFC", NULL, "NFD",
   3057         "nfd", NULL, "NFC", // make sure case is ignored
   3058         "Any-NFKD", NULL, "Any-NFKC",
   3059         "Null", NULL, "Null",
   3060         "-nfkc", "nfkc", "NFKD",
   3061         "-nfkc/", "nfkc", "NFKD",
   3062         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
   3063         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
   3064         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
   3065         "Source-", NULL, NULL,
   3066         "Source/Variant-", NULL, NULL,
   3067         "Source-/Variant", NULL, NULL,
   3068         "/Variant", NULL, NULL,
   3069         "/Variant-", NULL, NULL,
   3070         "-/Variant", NULL, NULL,
   3071         "-/", NULL, NULL,
   3072         "-", NULL, NULL,
   3073         "/", NULL, NULL,
   3074     };
   3075     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
   3076 
   3077     for (int32_t i=0; i<DATA_length; i+=3) {
   3078         const char* ID = DATA[i];
   3079         const char* expID = DATA[i+1];
   3080         const char* expInvID = DATA[i+2];
   3081         UBool expValid = (expInvID != NULL);
   3082         if (expID == NULL) {
   3083             expID = ID;
   3084         }
   3085         UParseError pe;
   3086         UErrorCode ec = U_ZERO_ERROR;
   3087         Transliterator *t =
   3088             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
   3089         if (U_FAILURE(ec)) {
   3090             if (!expValid) {
   3091                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
   3092             } else {
   3093                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
   3094             }
   3095             delete t;
   3096             continue;
   3097         }
   3098         Transliterator *u = t->createInverse(ec);
   3099         if (U_FAILURE(ec)) {
   3100             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
   3101             delete t;
   3102             delete u;
   3103             continue;
   3104         }
   3105         if (t->getID() == expID &&
   3106             u->getID() == expInvID) {
   3107             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
   3108         } else {
   3109             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
   3110                   t->getID() + " x getInverse() => " + u->getID() +
   3111                   ", expected " + expInvID);
   3112         }
   3113         delete t;
   3114         delete u;
   3115     }
   3116 }
   3117 
   3118 static const UChar SPACE[]   = {32,0};
   3119 static const UChar NEWLINE[] = {10,0};
   3120 static const UChar RETURN[]  = {13,0};
   3121 static const UChar EMPTY[]   = {0};
   3122 
   3123 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
   3124                                     const UnicodeString& testRulesForward) {
   3125     UnicodeString rules2; t2.toRules(rules2, TRUE);
   3126     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
   3127     rules2.findAndReplace(SPACE, EMPTY);
   3128     rules2.findAndReplace(NEWLINE, EMPTY);
   3129     rules2.findAndReplace(RETURN, EMPTY);
   3130 
   3131     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
   3132 
   3133     if (rules2 != testRules) {
   3134         errln(label);
   3135         logln((UnicodeString)"GENERATED RULES: " + rules2);
   3136         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
   3137     }
   3138 }
   3139 
   3140 /**
   3141  * Mark's toRules test.
   3142  */
   3143 void TransliteratorTest::TestToRulesMark() {
   3144     const char* testRules =
   3145         "::[[:Latin:][:Mark:]];"
   3146         "::NFKD (NFC);"
   3147         "::Lower (Lower);"
   3148         "a <> \\u03B1;" // alpha
   3149         "::NFKC (NFD);"
   3150         "::Upper (Lower);"
   3151         "::Lower ();"
   3152         "::([[:Greek:][:Mark:]]);"
   3153         ;
   3154     const char* testRulesForward =
   3155         "::[[:Latin:][:Mark:]];"
   3156         "::NFKD(NFC);"
   3157         "::Lower(Lower);"
   3158         "a > \\u03B1;"
   3159         "::NFKC(NFD);"
   3160         "::Upper (Lower);"
   3161         "::Lower ();"
   3162         ;
   3163     const char* testRulesBackward =
   3164         "::[[:Greek:][:Mark:]];"
   3165         "::Lower (Upper);"
   3166         "::NFD(NFKC);"
   3167         "\\u03B1 > a;"
   3168         "::Lower(Lower);"
   3169         "::NFC(NFKD);"
   3170         ;
   3171     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
   3172     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
   3173 
   3174     UParseError pe;
   3175     UErrorCode ec = U_ZERO_ERROR;
   3176     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
   3177     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
   3178 
   3179     if (U_FAILURE(ec)) {
   3180         delete t2;
   3181         delete t3;
   3182         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
   3183         return;
   3184     }
   3185 
   3186     expect(*t2, source, target);
   3187     expect(*t3, target, source);
   3188 
   3189     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
   3190     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
   3191 
   3192     delete t2;
   3193     delete t3;
   3194 }
   3195 
   3196 /**
   3197  * Test Escape and Unescape transliterators.
   3198  */
   3199 void TransliteratorTest::TestEscape() {
   3200     UParseError pe;
   3201     UErrorCode ec;
   3202     Transliterator *t;
   3203 
   3204     ec = U_ZERO_ERROR;
   3205     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
   3206     if (U_FAILURE(ec)) {
   3207         errln((UnicodeString)"FAIL: createInstance");
   3208     } else {
   3209         expect(*t,
   3210                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
   3211                "@12Q");
   3212     }
   3213     delete t;
   3214 
   3215     ec = U_ZERO_ERROR;
   3216     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
   3217     if (U_FAILURE(ec)) {
   3218         errln((UnicodeString)"FAIL: createInstance");
   3219     } else {
   3220         expect(*t,
   3221                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3222                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
   3223     }
   3224     delete t;
   3225 
   3226     ec = U_ZERO_ERROR;
   3227     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
   3228     if (U_FAILURE(ec)) {
   3229         errln((UnicodeString)"FAIL: createInstance");
   3230     } else {
   3231         expect(*t,
   3232                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3233                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
   3234     }
   3235     delete t;
   3236 
   3237     ec = U_ZERO_ERROR;
   3238     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
   3239     if (U_FAILURE(ec)) {
   3240         errln((UnicodeString)"FAIL: createInstance");
   3241     } else {
   3242         expect(*t,
   3243                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
   3244                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
   3245     }
   3246     delete t;
   3247 }
   3248 
   3249 
   3250 void TransliteratorTest::TestAnchorMasking(){
   3251     UnicodeString rule ("^a > Q; a > q;");
   3252     UErrorCode status= U_ZERO_ERROR;
   3253     UParseError parseError;
   3254 
   3255     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
   3256     if(U_FAILURE(status)){
   3257         errln(UnicodeString("FAIL: ") + "ID" +
   3258               ".createFromRules() => bad rules" +
   3259               /*", parse error " + parseError.code +*/
   3260               ", line " + parseError.line +
   3261               ", offset " + parseError.offset +
   3262               ", context " + prettify(parseError.preContext, TRUE) +
   3263               ", rules: " + prettify(rule, TRUE));
   3264     }
   3265     delete t;
   3266 }
   3267 
   3268 /**
   3269  * Make sure display names of variants look reasonable.
   3270  */
   3271 void TransliteratorTest::TestDisplayName() {
   3272 #if UCONFIG_NO_FORMATTING
   3273     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
   3274     return;
   3275 #else
   3276     static const char* DATA[] = {
   3277         // ID, forward name, reverse name
   3278         // Update the text as necessary -- the important thing is
   3279         // not the text itself, but how various cases are handled.
   3280 
   3281         // Basic test
   3282         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
   3283 
   3284         // Variants
   3285         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
   3286 
   3287         // Target-only IDs
   3288         "NFC", "Any to NFC", "Any to NFD",
   3289     };
   3290 
   3291     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
   3292 
   3293     Locale US("en", "US");
   3294 
   3295     for (int32_t i=0; i<DATA_length; i+=3) {
   3296         UnicodeString name;
   3297         Transliterator::getDisplayName(DATA[i], US, name);
   3298         if (name != DATA[i+1]) {
   3299             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
   3300                   name + ", expected " + DATA[i+1]);
   3301         } else {
   3302             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
   3303         }
   3304         UErrorCode ec = U_ZERO_ERROR;
   3305         UParseError pe;
   3306         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
   3307         if (U_FAILURE(ec)) {
   3308             delete t;
   3309             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
   3310             continue;
   3311         }
   3312         name = Transliterator::getDisplayName(t->getID(), US, name);
   3313         if (name != DATA[i+2]) {
   3314             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
   3315                   name + ", expected " + DATA[i+2]);
   3316         } else {
   3317             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
   3318         }
   3319         delete t;
   3320     }
   3321 #endif
   3322 }
   3323 
   3324 void TransliteratorTest::TestSpecialCases(void) {
   3325     const UnicodeString registerRules[] = {
   3326         "Any-Dev1", "x > X; y > Y;",
   3327         "Any-Dev2", "XY > Z",
   3328         "Greek-Latin/FAKE",
   3329             CharsToUnicodeString
   3330             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
   3331         "" // END MARKER
   3332     };
   3333 
   3334     const UnicodeString testCases[] = {
   3335         // NORMALIZATION
   3336         // should add more test cases
   3337         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3338         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3339         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3340         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
   3341 
   3342         // mp -> b BUG
   3343         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
   3344         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
   3345 
   3346         // check for devanagari bug
   3347         "nfd;Dev1;Dev2;nfc", "xy", "Z",
   3348 
   3349         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
   3350         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3351                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
   3352 
   3353         //TODO: enable this test once Titlecase works right
   3354         /*
   3355         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3356                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
   3357                  */
   3358         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3359                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
   3360         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
   3361                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
   3362 
   3363         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
   3364         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
   3365 
   3366          // FORMS OF S
   3367         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3368                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
   3369         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
   3370                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
   3371         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3372                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
   3373         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
   3374                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
   3375         // Tatiana bug
   3376         // Upper: TAT\\u02B9\\u00C2NA
   3377         // Lower: tat\\u02B9\\u00E2na
   3378         // Title: Tat\\u02B9\\u00E2na
   3379         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3380                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
   3381         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
   3382                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3383         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
   3384                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
   3385 
   3386         "" // END MARKER
   3387     };
   3388 
   3389     UParseError pos;
   3390     int32_t i;
   3391     for (i = 0; registerRules[i].length()!=0; i+=2) {
   3392         UErrorCode status = U_ZERO_ERROR;
   3393 
   3394         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
   3395             registerRules[i+1], UTRANS_FORWARD, pos, status);
   3396         if (U_FAILURE(status)) {
   3397             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
   3398         } else {
   3399             Transliterator::registerInstance(t);
   3400         }
   3401     }
   3402     for (i = 0; testCases[i].length()!=0; i+=3) {
   3403         UErrorCode ec = U_ZERO_ERROR;
   3404         UParseError pe;
   3405         const UnicodeString& name = testCases[i];
   3406         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
   3407         if (U_FAILURE(ec)) {
   3408             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
   3409             delete t;
   3410             continue;
   3411         }
   3412         const UnicodeString& id = t->getID();
   3413         const UnicodeString& source = testCases[i+1];
   3414         UnicodeString target;
   3415 
   3416         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
   3417 
   3418         if (testCases[i+2].length() > 0) {
   3419             target = testCases[i+2];
   3420         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
   3421             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
   3422         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
   3423             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
   3424         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
   3425             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
   3426         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
   3427             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
   3428         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
   3429             target = source;
   3430             target.toLower(Locale::getUS());
   3431         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
   3432             target = source;
   3433             target.toUpper(Locale::getUS());
   3434         }
   3435         if (U_FAILURE(ec)) {
   3436             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
   3437             continue;
   3438         }
   3439 
   3440         expect(*t, source, target);
   3441         delete t;
   3442     }
   3443     for (i = 0; registerRules[i].length()!=0; i+=2) {
   3444         Transliterator::unregister(registerRules[i]);
   3445     }
   3446 }
   3447 
   3448 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
   3449     if (ch <= 0xFFFF) {
   3450         sprintf(buffer, "\\u%04x", (int)ch);
   3451     } else {
   3452         sprintf(buffer, "\\U%08x", (int)ch);
   3453     }
   3454     return buffer;
   3455 }
   3456 
   3457 void TransliteratorTest::TestSurrogateCasing (void) {
   3458     // check that casing handles surrogates
   3459     // titlecase is currently defective
   3460     char buffer[20];
   3461     UChar buffer2[20];
   3462     UChar32 dee;
   3463     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
   3464     UnicodeString DEE(u_totitle(dee));
   3465     if (DEE != DESERET_DEE) {
   3466         err("Fails titlecase of surrogates");
   3467         err(Char32ToEscapedChars(dee, buffer));
   3468         err(", ");
   3469         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
   3470     }
   3471 
   3472     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
   3473     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
   3474     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
   3475     UErrorCode status= U_ZERO_ERROR;
   3476 
   3477     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
   3478     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
   3479         errln("Fails: Can't uppercase surrogates.");
   3480     }
   3481 
   3482     status= U_ZERO_ERROR;
   3483     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
   3484     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
   3485         errln("Fails: Can't lowercase surrogates.");
   3486     }
   3487 }
   3488 
   3489 static void _trans(Transliterator& t, const UnicodeString& src,
   3490                    UnicodeString& result) {
   3491     result = src;
   3492     t.transliterate(result);
   3493 }
   3494 
   3495 static void _trans(const UnicodeString& id, const UnicodeString& src,
   3496                    UnicodeString& result, UErrorCode ec) {
   3497     UParseError pe;
   3498     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
   3499     if (U_SUCCESS(ec)) {
   3500         _trans(*t, src, result);
   3501     }
   3502     delete t;
   3503 }
   3504 
   3505 static UnicodeString _findMatch(const UnicodeString& source,
   3506                                        const UnicodeString* pairs) {
   3507     UnicodeString empty;
   3508     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
   3509         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
   3510             return pairs[i+1];
   3511         }
   3512     }
   3513     return empty;
   3514 }
   3515 
   3516 // Check to see that incremental gets at least part way through a reasonable string.
   3517 
   3518 void TransliteratorTest::TestIncrementalProgress(void) {
   3519     UErrorCode ec = U_ZERO_ERROR;
   3520     UnicodeString latinTest = "The Quick Brown Fox.";
   3521     UnicodeString devaTest;
   3522     _trans("Latin-Devanagari", latinTest, devaTest, ec);
   3523     UnicodeString kataTest;
   3524     _trans("Latin-Katakana", latinTest, kataTest, ec);
   3525     if (U_FAILURE(ec)) {
   3526         errln("FAIL: Internal error");
   3527         return;
   3528     }
   3529     const UnicodeString tests[] = {
   3530         "Any", latinTest,
   3531         "Latin", latinTest,
   3532         "Halfwidth", latinTest,
   3533         "Devanagari", devaTest,
   3534         "Katakana", kataTest,
   3535         "" // END MARKER
   3536     };
   3537 
   3538     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
   3539     int32_t i = 0, j=0, k=0;
   3540     int32_t sources = Transliterator::countAvailableSources();
   3541     for (i = 0; i < sources; i++) {
   3542         UnicodeString source;
   3543         Transliterator::getAvailableSource(i, source);
   3544         UnicodeString test = _findMatch(source, tests);
   3545         if (test.length() == 0) {
   3546             logln((UnicodeString)"Skipping " + source + "-X");
   3547             continue;
   3548         }
   3549         int32_t targets = Transliterator::countAvailableTargets(source);
   3550         for (j = 0; j < targets; j++) {
   3551             UnicodeString target;
   3552             Transliterator::getAvailableTarget(j, source, target);
   3553             int32_t variants = Transliterator::countAvailableVariants(source, target);
   3554             for (k =0; k< variants; k++) {
   3555                 UnicodeString variant;
   3556                 UParseError err;
   3557                 UErrorCode status = U_ZERO_ERROR;
   3558 
   3559                 Transliterator::getAvailableVariant(k, source, target, variant);
   3560                 UnicodeString id = source + "-" + target + "/" + variant;
   3561 
   3562                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
   3563                 if (U_FAILURE(status)) {
   3564                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
   3565                     delete t;
   3566                     continue;
   3567                 }
   3568                 status = U_ZERO_ERROR;
   3569                 CheckIncrementalAux(t, test);
   3570 
   3571                 UnicodeString rev;
   3572                 _trans(*t, test, rev);
   3573                 Transliterator *inv = t->createInverse(status);
   3574                 if (U_FAILURE(status)) {
   3575 #if UCONFIG_NO_BREAK_ITERATION
   3576                     // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
   3577                     if (id.compare((UnicodeString)"Latin-Thai/") != 0)
   3578 #endif
   3579                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
   3580 
   3581                     delete t;
   3582                     delete inv;
   3583                     continue;
   3584                 }
   3585                 CheckIncrementalAux(inv, rev);
   3586                 delete t;
   3587                 delete inv;
   3588             }
   3589         }
   3590     }
   3591 }
   3592 
   3593 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
   3594                                                       const UnicodeString& input) {
   3595     UErrorCode ec = U_ZERO_ERROR;
   3596     UTransPosition pos;
   3597     UnicodeString test = input;
   3598 
   3599     pos.contextStart = 0;
   3600     pos.contextLimit = input.length();
   3601     pos.start = 0;
   3602     pos.limit = input.length();
   3603 
   3604     t->transliterate(test, pos, ec);
   3605     if (U_FAILURE(ec)) {
   3606         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
   3607         return;
   3608     }
   3609     UBool gotError = FALSE;
   3610     (void)gotError;    // Suppress set but not used warning.
   3611 
   3612     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
   3613 
   3614     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
   3615         errln((UnicodeString)"No Progress, " +
   3616               t->getID() + ": " + formatInput(test, input, pos));
   3617         gotError = TRUE;
   3618     } else {
   3619         logln((UnicodeString)"PASS Progress, " +
   3620               t->getID() + ": " + formatInput(test, input, pos));
   3621     }
   3622     t->finishTransliteration(test, pos);
   3623     if (pos.start != pos.limit) {
   3624         errln((UnicodeString)"Incomplete, " +
   3625               t->getID() + ": " + formatInput(test, input, pos));
   3626         gotError = TRUE;
   3627     }
   3628 }
   3629 
   3630 void TransliteratorTest::TestFunction() {
   3631     // Careful with spacing and ';' here:  Phrase this exactly
   3632     // as toRules() is going to return it.  If toRules() changes
   3633     // with regard to spacing or ';', then adjust this string.
   3634     UnicodeString rule =
   3635         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
   3636 
   3637     UParseError pe;
   3638     UErrorCode ec = U_ZERO_ERROR;
   3639     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3640     if (t == NULL) {
   3641         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
   3642         return;
   3643     }
   3644 
   3645     UnicodeString r;
   3646     t->toRules(r, TRUE);
   3647     if (r == rule) {
   3648         logln((UnicodeString)"OK: toRules() => " + r);
   3649     } else {
   3650         errln((UnicodeString)"FAIL: toRules() => " + r +
   3651               ", expected " + rule);
   3652     }
   3653 
   3654     expect(*t, "The Quick Brown Fox",
   3655            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
   3656 
   3657     delete t;
   3658 }
   3659 
   3660 void TransliteratorTest::TestInvalidBackRef(void) {
   3661     UnicodeString rule =  ". > $1;";
   3662     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
   3663     UParseError pe;
   3664     UErrorCode ec = U_ZERO_ERROR;
   3665     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3666     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
   3667 
   3668     if (t != NULL) {
   3669         errln("FAIL: createFromRules should have returned NULL");
   3670         delete t;
   3671     }
   3672 
   3673     if (t2 != NULL) {
   3674         errln("FAIL: createFromRules should have returned NULL");
   3675         delete t2;
   3676     }
   3677 
   3678     if (U_SUCCESS(ec)) {
   3679         errln("FAIL: Ok: . > $1; => no error");
   3680     } else {
   3681         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
   3682     }
   3683 }
   3684 
   3685 void TransliteratorTest::TestMulticharStringSet() {
   3686     // Basic testing
   3687     const char* rule =
   3688         "       [{aa}]       > x;"
   3689         "         a          > y;"
   3690         "       [b{bc}]      > z;"
   3691         "[{gd}] { e          > q;"
   3692         "         e } [{fg}] > r;" ;
   3693 
   3694     UParseError pe;
   3695     UErrorCode ec = U_ZERO_ERROR;
   3696     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3697     if (t == NULL || U_FAILURE(ec)) {
   3698         delete t;
   3699         errln("FAIL: createFromRules failed");
   3700         return;
   3701     }
   3702 
   3703     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
   3704            "y x yz z d gd de gdq gdqfg ddrfg");
   3705     delete t;
   3706 
   3707     // Overlapped string test.  Make sure that when multiple
   3708     // strings can match that the longest one is matched.
   3709     rule =
   3710         "    [a {ab} {abc}]    > x;"
   3711         "           b          > y;"
   3712         "           c          > z;"
   3713         " q [t {st} {rst}] { e > p;" ;
   3714 
   3715     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
   3716     if (t == NULL || U_FAILURE(ec)) {
   3717         delete t;
   3718         errln("FAIL: createFromRules failed");
   3719         return;
   3720     }
   3721 
   3722     expect(*t, "a ab abc qte qste qrste",
   3723            "x x x qtp qstp qrstp");
   3724     delete t;
   3725 }
   3726 
   3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   3728 // BEGIN TestUserFunction support factory
   3729 
   3730 Transliterator* _TUFF[4];
   3731 UnicodeString* _TUFID[4];
   3732 
   3733 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
   3734                                    Transliterator::Token context) {
   3735     return _TUFF[context.integer]->clone();
   3736 }
   3737 
   3738 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
   3739     _TUFF[n] = t;
   3740     _TUFID[n] = new UnicodeString(ID);
   3741     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
   3742 }
   3743 
   3744 static void _TUFUnreg(int32_t n) {
   3745     if (_TUFF[n] != NULL) {
   3746         Transliterator::unregister(*_TUFID[n]);
   3747         delete _TUFF[n];
   3748         delete _TUFID[n];
   3749     }
   3750 }
   3751 
   3752 // END TestUserFunction support factory
   3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   3754 
   3755 /**
   3756  * Test that user-registered transliterators can be used under function
   3757  * syntax.
   3758  */
   3759 void TransliteratorTest::TestUserFunction() {
   3760 
   3761     Transliterator* t;
   3762     UParseError pe;
   3763     UErrorCode ec = U_ZERO_ERROR;
   3764 
   3765     // Setup our factory
   3766     int32_t i;
   3767     for (i=0; i<4; ++i) {
   3768         _TUFF[i] = NULL;
   3769     }
   3770 
   3771     // There's no need to register inverses if we don't use them
   3772     t = Transliterator::createFromRules("gif",
   3773                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
   3774                                         UTRANS_FORWARD, pe, ec);
   3775     if (t == NULL || U_FAILURE(ec)) {
   3776         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
   3777         return;
   3778     }
   3779     _TUFReg("Any-gif", t, 0);
   3780 
   3781     t = Transliterator::createFromRules("RemoveCurly",
   3782                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
   3783                                         UTRANS_FORWARD, pe, ec);
   3784     if (t == NULL || U_FAILURE(ec)) {
   3785         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
   3786         goto FAIL;
   3787     }
   3788     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
   3789     _TUFReg("Any-RemoveCurly", t, 1);
   3790 
   3791     logln("Trying &hex");
   3792     t = Transliterator::createFromRules("hex2",
   3793                                         "(.) > &hex($1);",
   3794                                         UTRANS_FORWARD, pe, ec);
   3795     if (t == NULL || U_FAILURE(ec)) {
   3796         errln("FAIL: createFromRules");
   3797         goto FAIL;
   3798     }
   3799     logln("Registering");
   3800     _TUFReg("Any-hex2", t, 2);
   3801     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
   3802     if (t == NULL || U_FAILURE(ec)) {
   3803         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
   3804         goto FAIL;
   3805     }
   3806     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
   3807     delete t;
   3808 
   3809     logln("Trying &gif");
   3810     t = Transliterator::createFromRules("gif2",
   3811                                         "(.) > &Gif(&Hex2($1));",
   3812                                         UTRANS_FORWARD, pe, ec);
   3813     if (t == NULL || U_FAILURE(ec)) {
   3814         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
   3815         goto FAIL;
   3816     }
   3817     logln("Registering");
   3818     _TUFReg("Any-gif2", t, 3);
   3819     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
   3820     if (t == NULL || U_FAILURE(ec)) {
   3821         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
   3822         goto FAIL;
   3823     }
   3824     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
   3825            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
   3826     delete t;
   3827 
   3828     // Test that filters are allowed after &
   3829     t = Transliterator::createFromRules("test",
   3830                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
   3831                                         UTRANS_FORWARD, pe, ec);
   3832     if (t == NULL || U_FAILURE(ec)) {
   3833         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
   3834         goto FAIL;
   3835     }
   3836     expect(*t, "abc",
   3837            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
   3838     delete t;
   3839 
   3840  FAIL:
   3841     for (i=0; i<4; ++i) {
   3842         _TUFUnreg(i);
   3843     }
   3844 }
   3845 
   3846 /**
   3847  * Test the Any-X transliterators.
   3848  */
   3849 void TransliteratorTest::TestAnyX(void) {
   3850     UParseError parseError;
   3851     UErrorCode status = U_ZERO_ERROR;
   3852     Transliterator* anyLatin =
   3853         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   3854     if (anyLatin==0) {
   3855         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
   3856         delete anyLatin;
   3857         return;
   3858     }
   3859 
   3860     expect(*anyLatin,
   3861            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
   3862            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
   3863 
   3864     delete anyLatin;
   3865 }
   3866 
   3867 /**
   3868  * Test Any-X transliterators with sample letters from all scripts.
   3869  */
   3870 void TransliteratorTest::TestAny(void) {
   3871     UErrorCode status = U_ZERO_ERROR;
   3872     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
   3873     //       function call parameters going on in this test.
   3874     UnicodeSet alphabetic("[:alphabetic:]", status);
   3875     if (U_FAILURE(status)) {
   3876         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3877         return;
   3878     }
   3879     alphabetic.freeze();
   3880 
   3881     UnicodeString testString;
   3882     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
   3883         const char *scriptName = uscript_getShortName((UScriptCode)i);
   3884         if (scriptName == NULL) {
   3885             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
   3886             return;
   3887         }
   3888 
   3889         UnicodeSet sample;
   3890         sample.applyPropertyAlias("script", scriptName, status);
   3891         if (U_FAILURE(status)) {
   3892             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3893             return;
   3894         }
   3895         sample.retainAll(alphabetic);
   3896         for (int32_t count=0; count<5; count++) {
   3897             UChar32 c = sample.charAt(count);
   3898             if (c == -1) {
   3899                 break;
   3900             }
   3901             testString.append(c);
   3902         }
   3903     }
   3904 
   3905     UParseError parseError;
   3906     Transliterator* anyLatin =
   3907         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   3908     if (U_FAILURE(status)) {
   3909         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
   3910         return;
   3911     }
   3912 
   3913     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
   3914     anyLatin->transliterate(testString);
   3915     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
   3916     delete anyLatin;
   3917 }
   3918 
   3919 
   3920 /**
   3921  * Test the source and target set API.  These are only implemented
   3922  * for RBT and CompoundTransliterator at this time.
   3923  */
   3924 void TransliteratorTest::TestSourceTargetSet() {
   3925     UErrorCode ec = U_ZERO_ERROR;
   3926 
   3927     // Rules
   3928     const char* r =
   3929         "a > b; "
   3930         "r [x{lu}] > q;";
   3931 
   3932     // Expected source
   3933     UnicodeSet expSrc("[arx{lu}]", ec);
   3934 
   3935     // Expected target
   3936     UnicodeSet expTrg("[bq]", ec);
   3937 
   3938     UParseError pe;
   3939     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
   3940 
   3941     if (U_FAILURE(ec)) {
   3942         delete t;
   3943         errln("FAIL: Couldn't set up test");
   3944         return;
   3945     }
   3946 
   3947     UnicodeSet src; t->getSourceSet(src);
   3948     UnicodeSet trg; t->getTargetSet(trg);
   3949 
   3950     if (src == expSrc && trg == expTrg) {
   3951         UnicodeString a, b;
   3952         logln((UnicodeString)"Ok: " +
   3953               r + " => source = " + src.toPattern(a, TRUE) +
   3954               ", target = " + trg.toPattern(b, TRUE));
   3955     } else {
   3956         UnicodeString a, b, c, d;
   3957         errln((UnicodeString)"FAIL: " +
   3958               r + " => source = " + src.toPattern(a, TRUE) +
   3959               ", expected " + expSrc.toPattern(b, TRUE) +
   3960               "; target = " + trg.toPattern(c, TRUE) +
   3961               ", expected " + expTrg.toPattern(d, TRUE));
   3962     }
   3963 
   3964     delete t;
   3965 }
   3966 
   3967 /**
   3968  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
   3969  */
   3970 void TransliteratorTest::TestPatternWhiteSpace() {
   3971     // Rules
   3972     const char* r = "a > \\u200E b;";
   3973 
   3974     UErrorCode ec = U_ZERO_ERROR;
   3975     UParseError pe;
   3976     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
   3977 
   3978     if (U_FAILURE(ec)) {
   3979         errln("FAIL: Couldn't set up test");
   3980     } else {
   3981         expect(*t, "a", "b");
   3982     }
   3983     delete t;
   3984 
   3985     // UnicodeSet
   3986     ec = U_ZERO_ERROR;
   3987     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
   3988 
   3989     if (U_FAILURE(ec)) {
   3990         errln("FAIL: Couldn't set up test");
   3991     } else {
   3992         if (set.contains(0x200E)) {
   3993             errln("FAIL: U+200E not being ignored by UnicodeSet");
   3994         }
   3995     }
   3996 }
   3997 //======================================================================
   3998 // this method is in TestUScript.java
   3999 //======================================================================
   4000 void TransliteratorTest::TestAllCodepoints(){
   4001     UScriptCode code= USCRIPT_INVALID_CODE;
   4002     char id[256]={'\0'};
   4003     char abbr[256]={'\0'};
   4004     char newId[256]={'\0'};
   4005     char newAbbrId[256]={'\0'};
   4006     char oldId[256]={'\0'};
   4007     char oldAbbrId[256]={'\0'};
   4008 
   4009     UErrorCode status =U_ZERO_ERROR;
   4010     UParseError pe;
   4011 
   4012     for(uint32_t i = 0; i<=0x10ffff; i++){
   4013         code =  uscript_getScript(i,&status);
   4014         if(code == USCRIPT_INVALID_CODE){
   4015             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
   4016         }
   4017         const char* myId = uscript_getName(code);
   4018         if(!myId) {
   4019           dataerrln("Valid script code returned NULL name. Check your data!");
   4020           return;
   4021         }
   4022         uprv_strcpy(id,myId);
   4023         uprv_strcpy(abbr,uscript_getShortName(code));
   4024 
   4025         uprv_strcpy(newId,"[:");
   4026         uprv_strcat(newId,id);
   4027         uprv_strcat(newId,":];NFD");
   4028 
   4029         uprv_strcpy(newAbbrId,"[:");
   4030         uprv_strcat(newAbbrId,abbr);
   4031         uprv_strcat(newAbbrId,":];NFD");
   4032 
   4033         if(uprv_strcmp(newId,oldId)!=0){
   4034             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
   4035             if(t==NULL || U_FAILURE(status)){
   4036                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
   4037             }
   4038             delete t;
   4039         }
   4040         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
   4041             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
   4042             if(t==NULL || U_FAILURE(status)){
   4043                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
   4044             }
   4045             delete t;
   4046         }
   4047         uprv_strcpy(oldId,newId);
   4048         uprv_strcpy(oldAbbrId, newAbbrId);
   4049 
   4050     }
   4051 
   4052 }
   4053 
   4054 #define TEST_TRANSLIT_ID(id, cls) { \
   4055   UErrorCode ec = U_ZERO_ERROR; \
   4056   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
   4057   if (U_FAILURE(ec)) { \
   4058     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
   4059   } else { \
   4060     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
   4061       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
   4062     } \
   4063     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
   4064   } \
   4065   delete t; \
   4066 }
   4067 
   4068 #define TEST_TRANSLIT_RULE(rule, cls) { \
   4069   UErrorCode ec = U_ZERO_ERROR; \
   4070   UParseError pe; \
   4071   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
   4072   if (U_FAILURE(ec)) { \
   4073     errln("FAIL: Couldn't create " rule); \
   4074   } else { \
   4075     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
   4076       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
   4077     } \
   4078     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
   4079   } \
   4080   delete t; \
   4081 }
   4082 
   4083 void TransliteratorTest::TestBoilerplate() {
   4084     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
   4085     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
   4086     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
   4087     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
   4088     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
   4089     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
   4090     TEST_TRANSLIT_ID("Null", NullTransliterator);
   4091     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
   4092     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
   4093     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
   4094     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
   4095     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
   4096     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
   4097 }
   4098 
   4099 void TransliteratorTest::TestAlternateSyntax() {
   4100     // U+2206 == &
   4101     // U+2190 == <
   4102     // U+2192 == >
   4103     // U+2194 == <>
   4104     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
   4105            "abc",
   4106            "xbz");
   4107     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
   4108            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
   4109            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
   4110 }
   4111 
   4112 static const char* BEGIN_END_RULES[] = {
   4113     // [0]
   4114     "abc > xy;"
   4115     "aba > z;",
   4116 
   4117     // [1]
   4118 /*
   4119     "::BEGIN;"
   4120     "abc > xy;"
   4121     "::END;"
   4122     "::BEGIN;"
   4123     "aba > z;"
   4124     "::END;",
   4125 */
   4126     "", // test case commented out below, this is here to keep from messing up the indexes
   4127 
   4128     // [2]
   4129 /*
   4130     "abc > xy;"
   4131     "::BEGIN;"
   4132     "aba > z;"
   4133     "::END;",
   4134 */
   4135     "", // test case commented out below, this is here to keep from messing up the indexes
   4136 
   4137     // [3]
   4138 /*
   4139     "::BEGIN;"
   4140     "abc > xy;"
   4141     "::END;"
   4142     "aba > z;",
   4143 */
   4144     "", // test case commented out below, this is here to keep from messing up the indexes
   4145 
   4146     // [4]
   4147     "abc > xy;"
   4148     "::Null;"
   4149     "aba > z;",
   4150 
   4151     // [5]
   4152     "::Upper;"
   4153     "ABC > xy;"
   4154     "AB > x;"
   4155     "C > z;"
   4156     "::Upper;"
   4157     "XYZ > p;"
   4158     "XY > q;"
   4159     "Z > r;"
   4160     "::Upper;",
   4161 
   4162     // [6]
   4163     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4164     "$delim = [\\-$ws];"
   4165     "$ws $delim* > ' ';"
   4166     "'-' $delim* > '-';",
   4167 
   4168     // [7]
   4169     "::Null;"
   4170     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4171     "$delim = [\\-$ws];"
   4172     "$ws $delim* > ' ';"
   4173     "'-' $delim* > '-';",
   4174 
   4175     // [8]
   4176     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4177     "$delim = [\\-$ws];"
   4178     "$ws $delim* > ' ';"
   4179     "'-' $delim* > '-';"
   4180     "::Null;",
   4181 
   4182     // [9]
   4183     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4184     "$delim = [\\-$ws];"
   4185     "::Null;"
   4186     "$ws $delim* > ' ';"
   4187     "'-' $delim* > '-';",
   4188 
   4189     // [10]
   4190 /*
   4191     "::BEGIN;"
   4192     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4193     "$delim = [\\-$ws];"
   4194     "::END;"
   4195     "$ws $delim* > ' ';"
   4196     "'-' $delim* > '-';",
   4197 */
   4198     "", // test case commented out below, this is here to keep from messing up the indexes
   4199 
   4200     // [11]
   4201 /*
   4202     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4203     "$delim = [\\-$ws];"
   4204     "::BEGIN;"
   4205     "$ws $delim* > ' ';"
   4206     "'-' $delim* > '-';"
   4207     "::END;",
   4208 */
   4209     "", // test case commented out below, this is here to keep from messing up the indexes
   4210 
   4211     // [12]
   4212 /*
   4213     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4214     "$delim = [\\-$ws];"
   4215     "$ab = [ab];"
   4216     "::BEGIN;"
   4217     "$ws $delim* > ' ';"
   4218     "'-' $delim* > '-';"
   4219     "::END;"
   4220     "::BEGIN;"
   4221     "$ab { ' ' } $ab > '-';"
   4222     "c { ' ' > ;"
   4223     "::END;"
   4224     "::BEGIN;"
   4225     "'a-a' > a\\%|a;"
   4226     "::END;",
   4227 */
   4228     "", // test case commented out below, this is here to keep from messing up the indexes
   4229 
   4230     // [13]
   4231     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
   4232     "$delim = [\\-$ws];"
   4233     "$ab = [ab];"
   4234     "::Null;"
   4235     "$ws $delim* > ' ';"
   4236     "'-' $delim* > '-';"
   4237     "::Null;"
   4238     "$ab { ' ' } $ab > '-';"
   4239     "c { ' ' > ;"
   4240     "::Null;"
   4241     "'a-a' > a\\%|a;",
   4242 
   4243     // [14]
   4244 /*
   4245     "::[abc];"
   4246     "::BEGIN;"
   4247     "abc > xy;"
   4248     "::END;"
   4249     "::BEGIN;"
   4250     "aba > yz;"
   4251     "::END;"
   4252     "::Upper;",
   4253 */
   4254     "", // test case commented out below, this is here to keep from messing up the indexes
   4255 
   4256     // [15]
   4257     "::[abc];"
   4258     "abc > xy;"
   4259     "::Null;"
   4260     "aba > yz;"
   4261     "::Upper;",
   4262 
   4263     // [16]
   4264 /*
   4265     "::[abc];"
   4266     "::BEGIN;"
   4267     "abc <> xy;"
   4268     "::END;"
   4269     "::BEGIN;"
   4270     "aba <> yz;"
   4271     "::END;"
   4272     "::Upper(Lower);"
   4273     "::([XYZ]);"
   4274 */
   4275     "", // test case commented out below, this is here to keep from messing up the indexes
   4276 
   4277     // [17]
   4278     "::[abc];"
   4279     "abc <> xy;"
   4280     "::Null;"
   4281     "aba <> yz;"
   4282     "::Upper(Lower);"
   4283     "::([XYZ]);"
   4284 };
   4285 
   4286 /*
   4287 (This entire test is commented out below and will need some heavy revision when we re-add
   4288 the ::BEGIN/::END stuff)
   4289 static const char* BOGUS_BEGIN_END_RULES[] = {
   4290     // [7]
   4291     "::BEGIN;"
   4292     "abc > xy;"
   4293     "::BEGIN;"
   4294     "aba > z;"
   4295     "::END;"
   4296     "::END;",
   4297 
   4298     // [8]
   4299     "abc > xy;"
   4300     " aba > z;"
   4301     "::END;",
   4302 
   4303     // [9]
   4304     "::BEGIN;"
   4305     "::Upper;"
   4306     "::END;"
   4307 };
   4308 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
   4309 */
   4310 
   4311 static const char* BEGIN_END_TEST_CASES[] = {
   4312     // rules             input                   expected output
   4313     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
   4314 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
   4315 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
   4316 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
   4317     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
   4318     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
   4319 
   4320     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
   4321     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
   4322     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
   4323     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
   4324 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
   4325 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
   4326 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
   4327 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
   4328 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
   4329     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
   4330     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
   4331     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
   4332 
   4333 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4334     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4335 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
   4336     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
   4337 };
   4338 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
   4339 
   4340 void TransliteratorTest::TestBeginEnd() {
   4341     // run through the list of test cases above
   4342     int32_t i = 0;
   4343     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
   4344         expect((UnicodeString)"Test case #" + (i / 3),
   4345                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
   4346                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
   4347                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
   4348     }
   4349 
   4350     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
   4351     UParseError parseError;
   4352     UErrorCode status = U_ZERO_ERROR;
   4353     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
   4354             UTRANS_REVERSE, parseError, status);
   4355     if (reversed == 0 || U_FAILURE(status)) {
   4356         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
   4357     } else {
   4358         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
   4359     }
   4360     delete reversed;
   4361 
   4362     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
   4363     // that all of them cause errors
   4364 /*
   4365 (commented out until we have the real ::BEGIN/::END stuff in place
   4366     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
   4367         UParseError parseError;
   4368         UErrorCode status = U_ZERO_ERROR;
   4369         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
   4370                 UTRANS_FORWARD, parseError, status);
   4371         if (!U_FAILURE(status)) {
   4372             delete t;
   4373             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
   4374         }
   4375     }
   4376 */
   4377 }
   4378 
   4379 void TransliteratorTest::TestBeginEndToRules() {
   4380     // run through the same list of test cases we used above, but this time, instead of just
   4381     // instantiating a Transliterator from the rules and running the test against it, we instantiate
   4382     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
   4383     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
   4384     // to (i.e., does the same thing as) the original rule set
   4385     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
   4386         UParseError parseError;
   4387         UErrorCode status = U_ZERO_ERROR;
   4388         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
   4389                 UTRANS_FORWARD, parseError, status);
   4390         if (U_FAILURE(status)) {
   4391             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
   4392         } else {
   4393             UnicodeString rules;
   4394             t->toRules(rules, TRUE);
   4395             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
   4396                     UTRANS_FORWARD, parseError, status);
   4397             if (U_FAILURE(status)) {
   4398                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
   4399                         parseError, status);
   4400                 delete t;
   4401             } else {
   4402                 expect(*t2,
   4403                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
   4404                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
   4405                 delete t;
   4406                 delete t2;
   4407             }
   4408         }
   4409     }
   4410 
   4411     // do the same thing for the reversible test case
   4412     UParseError parseError;
   4413     UErrorCode status = U_ZERO_ERROR;
   4414     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
   4415             UTRANS_REVERSE, parseError, status);
   4416     if (U_FAILURE(status)) {
   4417         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
   4418     } else {
   4419         UnicodeString rules;
   4420         reversed->toRules(rules, FALSE);
   4421         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
   4422                 parseError, status);
   4423         if (U_FAILURE(status)) {
   4424             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
   4425                     parseError, status);
   4426             delete reversed;
   4427         } else {
   4428             expect(*reversed2,
   4429                    UnicodeString("xy XY XYZ yz YZ"),
   4430                    UnicodeString("xy abc xaba yz aba"));
   4431             delete reversed;
   4432             delete reversed2;
   4433         }
   4434     }
   4435 }
   4436 
   4437 void TransliteratorTest::TestRegisterAlias() {
   4438     UnicodeString longID("Lower;[aeiou]Upper");
   4439     UnicodeString shortID("Any-CapVowels");
   4440     UnicodeString reallyShortID("CapVowels");
   4441 
   4442     Transliterator::registerAlias(shortID, longID);
   4443 
   4444     UErrorCode err = U_ZERO_ERROR;
   4445     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
   4446     if (U_FAILURE(err)) {
   4447         errln("Failed to instantiate transliterator with long ID");
   4448         Transliterator::unregister(shortID);
   4449         return;
   4450     }
   4451     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
   4452     if (U_FAILURE(err)) {
   4453         errln("Failed to instantiate transliterator with short ID");
   4454         delete t1;
   4455         Transliterator::unregister(shortID);
   4456         return;
   4457     }
   4458 
   4459     if (t1->getID() != longID)
   4460         errln("Transliterator instantiated with long ID doesn't have long ID");
   4461     if (t2->getID() != reallyShortID)
   4462         errln("Transliterator instantiated with short ID doesn't have short ID");
   4463 
   4464     UnicodeString rules1;
   4465     UnicodeString rules2;
   4466 
   4467     t1->toRules(rules1, TRUE);
   4468     t2->toRules(rules2, TRUE);
   4469     if (rules1 != rules2)
   4470         errln("Alias transliterators aren't the same");
   4471 
   4472     delete t1;
   4473     delete t2;
   4474     Transliterator::unregister(shortID);
   4475 
   4476     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
   4477     if (U_SUCCESS(err)) {
   4478         errln("Instantiation with short ID succeeded after short ID was unregistered");
   4479         delete t1;
   4480     }
   4481 
   4482     // try the same thing again, but this time with something other than
   4483     // an instance of CompoundTransliterator
   4484     UnicodeString realID("Latin-Greek");
   4485     UnicodeString fakeID("Latin-dlgkjdflkjdl");
   4486     Transliterator::registerAlias(fakeID, realID);
   4487 
   4488     err = U_ZERO_ERROR;
   4489     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
   4490     if (U_FAILURE(err)) {
   4491         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
   4492         Transliterator::unregister(realID);
   4493         return;
   4494     }
   4495     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
   4496     if (U_FAILURE(err)) {
   4497         errln("Failed to instantiate transliterator with fake ID");
   4498         delete t1;
   4499         Transliterator::unregister(realID);
   4500         return;
   4501     }
   4502 
   4503     t1->toRules(rules1, TRUE);
   4504     t2->toRules(rules2, TRUE);
   4505     if (rules1 != rules2)
   4506         errln("Alias transliterators aren't the same");
   4507 
   4508     delete t1;
   4509     delete t2;
   4510     Transliterator::unregister(fakeID);
   4511 }
   4512 
   4513 void TransliteratorTest::TestRuleStripping() {
   4514     /*
   4515 #
   4516 \uE001>\u0C01; # SIGN
   4517     */
   4518     static const UChar rule[] = {
   4519         0x0023,0x0020,0x000D,0x000A,
   4520         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
   4521     };
   4522     static const UChar expectedRule[] = {
   4523         0xE001,0x003E,0x0C01,0x003B,0
   4524     };
   4525     UChar result[sizeof(rule)/sizeof(rule[0])];
   4526     UErrorCode status = U_ZERO_ERROR;
   4527     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
   4528     if (len != u_strlen(expectedRule)) {
   4529         errln("utrans_stripRules return len = %d", len);
   4530     }
   4531     if (u_strncmp(expectedRule, result, len) != 0) {
   4532         errln("utrans_stripRules did not return expected string");
   4533     }
   4534 }
   4535 
   4536 /**
   4537  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
   4538  */
   4539 void TransliteratorTest::TestHalfwidthFullwidth(void) {
   4540     UParseError parseError;
   4541     UErrorCode status = U_ZERO_ERROR;
   4542     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
   4543     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
   4544     if (hf == 0 || fh == 0) {
   4545         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   4546         delete hf;
   4547         delete fh;
   4548         return;
   4549     }
   4550 
   4551     // Array of 2n items
   4552     // Each item is
   4553     //   "hf"|"fh"|"both",
   4554     //   <Halfwidth>,
   4555     //   <Fullwidth>
   4556     const char* DATA[] = {
   4557         "both",
   4558         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
   4559         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
   4560     };
   4561     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
   4562 
   4563     for (int32_t i=0; i<DATA_length; i+=3) {
   4564         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
   4565         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
   4566         switch (*DATA[i]) {
   4567         case 0x68: //'h': // Halfwidth-Fullwidth only
   4568             expect(*hf, h, f);
   4569             break;
   4570         case 0x66: //'f': // Fullwidth-Halfwidth only
   4571             expect(*fh, f, h);
   4572             break;
   4573         case 0x62: //'b': // both directions
   4574             expect(*hf, h, f);
   4575             expect(*fh, f, h);
   4576             break;
   4577         }
   4578     }
   4579     delete hf;
   4580     delete fh;
   4581 }
   4582 
   4583 
   4584     /**
   4585      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
   4586      *              TODO: confirm that the expected results are correct.
   4587      *              For now, test just confirms that C++ and Java give identical results.
   4588      */
   4589 void TransliteratorTest::TestThai(void) {
   4590 #if !UCONFIG_NO_BREAK_ITERATION
   4591     UParseError parseError;
   4592     UErrorCode status = U_ZERO_ERROR;
   4593     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
   4594     if (tr == 0) {
   4595         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
   4596         return;
   4597     }
   4598     if (U_FAILURE(status)) {
   4599         errln("FAIL: createInstance failed with %s", u_errorName(status));
   4600         return;
   4601     }
   4602     const char *thaiText =
   4603         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
   4604         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
   4605         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
   4606         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
   4607         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
   4608         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
   4609         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
   4610         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
   4611         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
   4612         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
   4613         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
   4614         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
   4615         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
   4616         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
   4617         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
   4618         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
   4619         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
   4620         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
   4621         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
   4622         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
   4623         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
   4624         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
   4625         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
   4626         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
   4627         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
   4628         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
   4629         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
   4630         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
   4631         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
   4632         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
   4633 
   4634     const char *latinText =
   4635         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
   4636         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
   4637         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
   4638         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
   4639         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
   4640         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
   4641         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
   4642         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
   4643         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
   4644         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
   4645         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
   4646         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
   4647         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
   4648         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
   4649         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
   4650         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
   4651         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
   4652         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
   4653 
   4654 
   4655     UnicodeString  xlitText(thaiText);
   4656     xlitText = xlitText.unescape();
   4657     tr->transliterate(xlitText);
   4658 
   4659     UnicodeString expectedText(latinText);
   4660     expectedText = expectedText.unescape();
   4661     expect(*tr, xlitText, expectedText);
   4662 
   4663     delete tr;
   4664 #endif
   4665 }
   4666 
   4667 
   4668 //======================================================================
   4669 // Support methods
   4670 //======================================================================
   4671 void TransliteratorTest::expectT(const UnicodeString& id,
   4672                                  const UnicodeString& source,
   4673                                  const UnicodeString& expectedResult) {
   4674     UErrorCode ec = U_ZERO_ERROR;
   4675     UParseError pe;
   4676     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
   4677     if (U_FAILURE(ec)) {
   4678         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
   4679         delete t;
   4680         return;
   4681     }
   4682     expect(*t, source, expectedResult);
   4683     delete t;
   4684 }
   4685 
   4686 void TransliteratorTest::reportParseError(const UnicodeString& message,
   4687                                           const UParseError& parseError,
   4688                                           const UErrorCode& status) {
   4689     dataerrln(message +
   4690           /*", parse error " + parseError.code +*/
   4691           ", line " + parseError.line +
   4692           ", offset " + parseError.offset +
   4693           ", pre-context " + prettify(parseError.preContext, TRUE) +
   4694           ", post-context " + prettify(parseError.postContext,TRUE) +
   4695           ", Error: " + u_errorName(status));
   4696 }
   4697 
   4698 void TransliteratorTest::expect(const UnicodeString& rules,
   4699                                 const UnicodeString& source,
   4700                                 const UnicodeString& expectedResult,
   4701                                 UTransPosition *pos) {
   4702     expect("<ID>", rules, source, expectedResult, pos);
   4703 }
   4704 
   4705 void TransliteratorTest::expect(const UnicodeString& id,
   4706                                 const UnicodeString& rules,
   4707                                 const UnicodeString& source,
   4708                                 const UnicodeString& expectedResult,
   4709                                 UTransPosition *pos) {
   4710     UErrorCode status = U_ZERO_ERROR;
   4711     UParseError parseError;
   4712     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
   4713     if (U_FAILURE(status)) {
   4714         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
   4715     } else {
   4716         expect(*t, source, expectedResult, pos);
   4717     }
   4718     delete t;
   4719 }
   4720 
   4721 void TransliteratorTest::expect(const Transliterator& t,
   4722                                 const UnicodeString& source,
   4723                                 const UnicodeString& expectedResult,
   4724                                 const Transliterator& reverseTransliterator) {
   4725     expect(t, source, expectedResult);
   4726     expect(reverseTransliterator, expectedResult, source);
   4727 }
   4728 
   4729 void TransliteratorTest::expect(const Transliterator& t,
   4730                                 const UnicodeString& source,
   4731                                 const UnicodeString& expectedResult,
   4732                                 UTransPosition *pos) {
   4733     if (pos == 0) {
   4734         UnicodeString result(source);
   4735         t.transliterate(result);
   4736         expectAux(t.getID() + ":String", source, result, expectedResult);
   4737     }
   4738     UTransPosition index={0, 0, 0, 0};
   4739     if (pos != 0) {
   4740         index = *pos;
   4741     }
   4742 
   4743     UnicodeString rsource(source);
   4744     if (pos == 0) {
   4745         t.transliterate(rsource);
   4746     } else {
   4747         // Do it all at once -- below we do it incrementally
   4748         t.finishTransliteration(rsource, *pos);
   4749     }
   4750     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
   4751 
   4752     // Test keyboard (incremental) transliteration -- this result
   4753     // must be the same after we finalize (see below).
   4754     UnicodeString log;
   4755     rsource.remove();
   4756     if (pos != 0) {
   4757         rsource = source;
   4758         formatInput(log, rsource, index);
   4759         log.append(" -> ");
   4760         UErrorCode status = U_ZERO_ERROR;
   4761         t.transliterate(rsource, index, status);
   4762         formatInput(log, rsource, index);
   4763     } else {
   4764         for (int32_t i=0; i<source.length(); ++i) {
   4765             if (i != 0) {
   4766                 log.append(" + ");
   4767             }
   4768             log.append(source.charAt(i)).append(" -> ");
   4769             UErrorCode status = U_ZERO_ERROR;
   4770             t.transliterate(rsource, index, source.charAt(i), status);
   4771             formatInput(log, rsource, index);
   4772         }
   4773     }
   4774 
   4775     // As a final step in keyboard transliteration, we must call
   4776     // transliterate to finish off any pending partial matches that
   4777     // were waiting for more input.
   4778     t.finishTransliteration(rsource, index);
   4779     log.append(" => ").append(rsource);
   4780 
   4781     expectAux(t.getID() + ":Keyboard", log,
   4782               rsource == expectedResult,
   4783               expectedResult);
   4784 }
   4785 
   4786 
   4787 /**
   4788  * @param appendTo result is appended to this param.
   4789  * @param input the string being transliterated
   4790  * @param pos the index struct
   4791  */
   4792 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
   4793                                                const UnicodeString& input,
   4794                                                const UTransPosition& pos) {
   4795     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
   4796     // the {} indicate the context start and limit, and the ||
   4797     // indicate the start and limit.
   4798     if (0 <= pos.contextStart &&
   4799         pos.contextStart <= pos.start &&
   4800         pos.start <= pos.limit &&
   4801         pos.limit <= pos.contextLimit &&
   4802         pos.contextLimit <= input.length()) {
   4803 
   4804         UnicodeString a, b, c, d, e;
   4805         input.extractBetween(0, pos.contextStart, a);
   4806         input.extractBetween(pos.contextStart, pos.start, b);
   4807         input.extractBetween(pos.start, pos.limit, c);
   4808         input.extractBetween(pos.limit, pos.contextLimit, d);
   4809         input.extractBetween(pos.contextLimit, input.length(), e);
   4810         appendTo.append(a).append((UChar)123/*{*/).append(b).
   4811             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
   4812             append((UChar)125/*}*/).append(e);
   4813     } else {
   4814         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
   4815                         pos.contextStart + ", s=" + pos.start + ", l=" +
   4816                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
   4817                         input);
   4818     }
   4819     return appendTo;
   4820 }
   4821 
   4822 void TransliteratorTest::expectAux(const UnicodeString& tag,
   4823                                    const UnicodeString& source,
   4824                                    const UnicodeString& result,
   4825                                    const UnicodeString& expectedResult) {
   4826     expectAux(tag, source + " -> " + result,
   4827               result == expectedResult,
   4828               expectedResult);
   4829 }
   4830 
   4831 void TransliteratorTest::expectAux(const UnicodeString& tag,
   4832                                    const UnicodeString& summary, UBool pass,
   4833                                    const UnicodeString& expectedResult) {
   4834     if (pass) {
   4835         logln(UnicodeString("(")+tag+") " + prettify(summary));
   4836     } else {
   4837         dataerrln(UnicodeString("FAIL: (")+tag+") "
   4838               + prettify(summary)
   4839               + ", expected " + prettify(expectedResult));
   4840     }
   4841 }
   4842 
   4843 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
   4844