Home | History | Annotate | Download | only in ustring
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2000-2014, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  ustring.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2000aug15
     14 *   created by: Markus W. Scherer
     15 *
     16 *   This file contains sample code that illustrates the use of Unicode strings
     17 *   with ICU.
     18 */
     19 
     20 #include <stdio.h>
     21 #include "unicode/utypes.h"
     22 #include "unicode/uchar.h"
     23 #include "unicode/locid.h"
     24 #include "unicode/ustring.h"
     25 #include "unicode/ucnv.h"
     26 #include "unicode/unistr.h"
     27 
     28 // helper functions -------------------------------------------------------- ***
     29 
     30 // default converter for the platform encoding
     31 static UConverter *cnv=NULL;
     32 
     33 static void
     34 printUString(const char *announce, const UChar *s, int32_t length) {
     35     static char out[200];
     36     UChar32 c;
     37     int32_t i;
     38     UErrorCode errorCode=U_ZERO_ERROR;
     39 
     40     /*
     41      * Convert to the "platform encoding". See notes in printUnicodeString().
     42      * ucnv_fromUChars(), like most ICU APIs understands length==-1
     43      * to mean that the string is NUL-terminated.
     44      */
     45     ucnv_fromUChars(cnv, out, sizeof(out), s, length, &errorCode);
     46     if(U_FAILURE(errorCode) || errorCode==U_STRING_NOT_TERMINATED_WARNING) {
     47         printf("%sproblem converting string from Unicode: %s\n", announce, u_errorName(errorCode));
     48         return;
     49     }
     50 
     51     printf("%s%s {", announce, out);
     52 
     53     /* output the code points (not code units) */
     54     if(length>=0) {
     55         /* s is not NUL-terminated */
     56         for(i=0; i<length; /* U16_NEXT post-increments */) {
     57             U16_NEXT(s, i, length, c);
     58             printf(" %04x", c);
     59         }
     60     } else {
     61         /* s is NUL-terminated */
     62         for(i=0; /* condition in loop body */; /* U16_NEXT post-increments */) {
     63             U16_NEXT(s, i, length, c);
     64             if(c==0) {
     65                 break;
     66             }
     67             printf(" %04x", c);
     68         }
     69     }
     70     printf(" }\n");
     71 }
     72 
     73 static void
     74 printUnicodeString(const char *announce, const UnicodeString &s) {
     75     static char out[200];
     76     int32_t i, length;
     77 
     78     // output the string, converted to the platform encoding
     79 
     80     // Note for Windows: The "platform encoding" defaults to the "ANSI codepage",
     81     // which is different from the "OEM codepage" in the console window.
     82     // However, if you pipe the output into a file and look at it with Notepad
     83     // or similar, then "ANSI" characters will show correctly.
     84     // Production code should be aware of what encoding is required,
     85     // and use a UConverter or at least a charset name explicitly.
     86     out[s.extract(0, 99, out)]=0;
     87     printf("%s%s {", announce, out);
     88 
     89     // output the code units (not code points)
     90     length=s.length();
     91     for(i=0; i<length; ++i) {
     92         printf(" %04x", s.charAt(i));
     93     }
     94     printf(" }\n");
     95 }
     96 
     97 // sample code for utf.h macros -------------------------------------------- ***
     98 
     99 static void
    100 demo_utf_h_macros() {
    101     static UChar input[]={ 0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062 };
    102     UChar32 c;
    103     int32_t i;
    104     UBool isError;
    105 
    106     printf("\n* demo_utf_h_macros() -------------- ***\n\n");
    107 
    108     printUString("iterate forward through: ", input, UPRV_LENGTHOF(input));
    109     for(i=0; i<UPRV_LENGTHOF(input); /* U16_NEXT post-increments */) {
    110         /* Iterating forwards
    111            Codepoint at offset 0: U+0061
    112            Codepoint at offset 1: U+10000
    113            Codepoint at offset 3: U+10ffff
    114            Codepoint at offset 5: U+0062
    115         */
    116         printf("Codepoint at offset %d: U+", i);
    117         U16_NEXT(input, i, UPRV_LENGTHOF(input), c);
    118         printf("%04x\n", c);
    119     }
    120 
    121     puts("");
    122 
    123     isError=FALSE;
    124     i=1; /* write position, gets post-incremented so needs to be in an l-value */
    125     U16_APPEND(input, i, UPRV_LENGTHOF(input), 0x0062, isError);
    126 
    127     printUString("iterate backward through: ", input, UPRV_LENGTHOF(input));
    128     for(i=UPRV_LENGTHOF(input); i>0; /* U16_PREV pre-decrements */) {
    129         U16_PREV(input, 0, i, c);
    130         /* Iterating backwards
    131            Codepoint at offset 5: U+0062
    132            Codepoint at offset 3: U+10ffff
    133            Codepoint at offset 2: U+dc00 -- unpaired surrogate because lead surr. overwritten
    134            Codepoint at offset 1: U+0062 -- by this BMP code point
    135            Codepoint at offset 0: U+0061
    136         */
    137         printf("Codepoint at offset %d: U+%04x\n", i, c);
    138     }
    139 }
    140 
    141 // sample code for Unicode strings in C ------------------------------------ ***
    142 
    143 static void demo_C_Unicode_strings() {
    144     printf("\n* demo_C_Unicode_strings() --------- ***\n\n");
    145 
    146     static const UChar text[]={ 0x41, 0x42, 0x43, 0 };          /* "ABC" */
    147     static const UChar appendText[]={ 0x61, 0x62, 0x63, 0 };    /* "abc" */
    148     static const UChar cmpText[]={ 0x61, 0x53, 0x73, 0x43, 0 }; /* "aSsC" */
    149     UChar buffer[32];
    150     int32_t compare;
    151     int32_t length=u_strlen(text); /* length=3 */
    152 
    153     /* simple ANSI C-style functions */
    154     buffer[0]=0;                    /* empty, NUL-terminated string */
    155     u_strncat(buffer, text, 1);     /* append just n=1 character ('A') */
    156     u_strcat(buffer, appendText);   /* buffer=="Aabc" */
    157     length=u_strlen(buffer);        /* length=4 */
    158     printUString("should be \"Aabc\": ", buffer, -1);
    159 
    160     /* bitwise comparing buffer with text */
    161     compare=u_strcmp(buffer, text);
    162     if(compare<=0) {
    163         printf("String comparison error, expected \"Aabc\" > \"ABC\"\n");
    164     }
    165 
    166     /* Build "A<sharp s>C" in the buffer... */
    167     u_strcpy(buffer, text);
    168     buffer[1]=0xdf; /* sharp s, case-compares equal to "ss" */
    169     printUString("should be \"A<sharp s>C\": ", buffer, -1);
    170 
    171     /* Compare two strings case-insensitively using full case folding */
    172     compare=u_strcasecmp(buffer, cmpText, U_FOLD_CASE_DEFAULT);
    173     if(compare!=0) {
    174         printf("String case insensitive comparison error, expected \"AbC\" to be equal to \"ABC\"\n");
    175     }
    176 }
    177 
    178 // sample code for case mappings with C APIs -------------------------------- ***
    179 
    180 static void demoCaseMapInC() {
    181     /*
    182      * input=
    183      *   "aB<capital sigma>"
    184      *   "iI<small dotless i><capital dotted I> "
    185      *   "<sharp s> <small lig. ffi>"
    186      *   "<small final sigma><small sigma><capital sigma>"
    187      */
    188     static const UChar input[]={
    189         0x61, 0x42, 0x3a3,
    190         0x69, 0x49, 0x131, 0x130, 0x20,
    191         0xdf, 0x20, 0xfb03,
    192         0x3c2, 0x3c3, 0x3a3, 0
    193     };
    194     UChar buffer[32];
    195 
    196     UErrorCode errorCode;
    197     UChar32 c;
    198     int32_t i, j, length;
    199     UBool isError;
    200 
    201     printf("\n* demoCaseMapInC() ----------------- ***\n\n");
    202 
    203     /*
    204      * First, use simple case mapping functions which provide
    205      * 1:1 code point mappings without context/locale ID.
    206      *
    207      * Note that some mappings will not be "right" because some "real"
    208      * case mappings require context, depend on the locale ID,
    209      * and/or result in a change in the number of code points.
    210      */
    211     printUString("input string: ", input, -1);
    212 
    213     /* uppercase */
    214     isError=FALSE;
    215     for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
    216         U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
    217         if(c==0) {
    218             break; /* stop at terminating NUL, no need to terminate buffer */
    219         }
    220         c=u_toupper(c);
    221         U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    222     }
    223     printUString("simple-uppercased: ", buffer, j);
    224     /* lowercase */
    225     isError=FALSE;
    226     for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
    227         U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
    228         if(c==0) {
    229             break; /* stop at terminating NUL, no need to terminate buffer */
    230         }
    231         c=u_tolower(c);
    232         U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    233     }
    234     printUString("simple-lowercased: ", buffer, j);
    235     /* titlecase */
    236     isError=FALSE;
    237     for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
    238         U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
    239         if(c==0) {
    240             break; /* stop at terminating NUL, no need to terminate buffer */
    241         }
    242         c=u_totitle(c);
    243         U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    244     }
    245     printUString("simple-titlecased: ", buffer, j);
    246     /* case-fold/default */
    247     isError=FALSE;
    248     for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
    249         U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
    250         if(c==0) {
    251             break; /* stop at terminating NUL, no need to terminate buffer */
    252         }
    253         c=u_foldCase(c, U_FOLD_CASE_DEFAULT);
    254         U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    255     }
    256     printUString("simple-case-folded/default: ", buffer, j);
    257     /* case-fold/Turkic */
    258     isError=FALSE;
    259     for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
    260         U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
    261         if(c==0) {
    262             break; /* stop at terminating NUL, no need to terminate buffer */
    263         }
    264         c=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
    265         U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
    266     }
    267     printUString("simple-case-folded/Turkic: ", buffer, j);
    268 
    269     /*
    270      * Second, use full case mapping functions which provide
    271      * 1:n code point mappings (n can be 0!) and are sensitive to context and locale ID.
    272      *
    273      * Note that lower/upper/titlecasing take a locale ID while case-folding
    274      * has bit flag options instead, by design of the Unicode SpecialCasing.txt UCD file.
    275      *
    276      * Also, string titlecasing requires a BreakIterator to find starts of words.
    277      * The sample code here passes in a NULL pointer; u_strToTitle() will open and close a default
    278      * titlecasing BreakIterator automatically.
    279      * For production code where many strings are titlecased it would be more efficient
    280      * to open a BreakIterator externally and pass it in.
    281      */
    282     printUString("\ninput string: ", input, -1);
    283 
    284     /* lowercase/English */
    285     errorCode=U_ZERO_ERROR;
    286     length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode);
    287     if(U_SUCCESS(errorCode)) {
    288         printUString("full-lowercased/en: ", buffer, length);
    289     } else {
    290         printf("error in u_strToLower(en)=%ld error=%s\n", length, u_errorName(errorCode));
    291     }
    292     /* lowercase/Turkish */
    293     errorCode=U_ZERO_ERROR;
    294     length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode);
    295     if(U_SUCCESS(errorCode)) {
    296         printUString("full-lowercased/tr: ", buffer, length);
    297     } else {
    298         printf("error in u_strToLower(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    299     }
    300     /* uppercase/English */
    301     errorCode=U_ZERO_ERROR;
    302     length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode);
    303     if(U_SUCCESS(errorCode)) {
    304         printUString("full-uppercased/en: ", buffer, length);
    305     } else {
    306         printf("error in u_strToUpper(en)=%ld error=%s\n", length, u_errorName(errorCode));
    307     }
    308     /* uppercase/Turkish */
    309     errorCode=U_ZERO_ERROR;
    310     length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode);
    311     if(U_SUCCESS(errorCode)) {
    312         printUString("full-uppercased/tr: ", buffer, length);
    313     } else {
    314         printf("error in u_strToUpper(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    315     }
    316     /* titlecase/English */
    317     errorCode=U_ZERO_ERROR;
    318     length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "en", &errorCode);
    319     if(U_SUCCESS(errorCode)) {
    320         printUString("full-titlecased/en: ", buffer, length);
    321     } else {
    322         printf("error in u_strToTitle(en)=%ld error=%s\n", length, u_errorName(errorCode));
    323     }
    324     /* titlecase/Turkish */
    325     errorCode=U_ZERO_ERROR;
    326     length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "tr", &errorCode);
    327     if(U_SUCCESS(errorCode)) {
    328         printUString("full-titlecased/tr: ", buffer, length);
    329     } else {
    330         printf("error in u_strToTitle(tr)=%ld error=%s\n", length, u_errorName(errorCode));
    331     }
    332     /* case-fold/default */
    333     errorCode=U_ZERO_ERROR;
    334     length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_DEFAULT, &errorCode);
    335     if(U_SUCCESS(errorCode)) {
    336         printUString("full-case-folded/default: ", buffer, length);
    337     } else {
    338         printf("error in u_strFoldCase(default)=%ld error=%s\n", length, u_errorName(errorCode));
    339     }
    340     /* case-fold/Turkic */
    341     errorCode=U_ZERO_ERROR;
    342     length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
    343     if(U_SUCCESS(errorCode)) {
    344         printUString("full-case-folded/Turkic: ", buffer, length);
    345     } else {
    346         printf("error in u_strFoldCase(Turkic)=%ld error=%s\n", length, u_errorName(errorCode));
    347     }
    348 }
    349 
    350 // sample code for case mappings with C++ APIs ------------------------------ ***
    351 
    352 static void demoCaseMapInCPlusPlus() {
    353     /*
    354      * input=
    355      *   "aB<capital sigma>"
    356      *   "iI<small dotless i><capital dotted I> "
    357      *   "<sharp s> <small lig. ffi>"
    358      *   "<small final sigma><small sigma><capital sigma>"
    359      */
    360     static const UChar input[]={
    361         0x61, 0x42, 0x3a3,
    362         0x69, 0x49, 0x131, 0x130, 0x20,
    363         0xdf, 0x20, 0xfb03,
    364         0x3c2, 0x3c3, 0x3a3, 0
    365     };
    366 
    367     printf("\n* demoCaseMapInCPlusPlus() --------- ***\n\n");
    368 
    369     UnicodeString s(input), t;
    370     const Locale &en=Locale::getEnglish();
    371     Locale tr("tr");
    372 
    373     /*
    374      * Full case mappings as in demoCaseMapInC(), using UnicodeString functions.
    375      * These functions modify the string object itself.
    376      * Since we want to keep the input string around, we copy it each time
    377      * and case-map the copy.
    378      */
    379     printUnicodeString("input string: ", s);
    380 
    381     /* lowercase/English */
    382     printUnicodeString("full-lowercased/en: ", (t=s).toLower(en));
    383     /* lowercase/Turkish */
    384     printUnicodeString("full-lowercased/tr: ", (t=s).toLower(tr));
    385     /* uppercase/English */
    386     printUnicodeString("full-uppercased/en: ", (t=s).toUpper(en));
    387     /* uppercase/Turkish */
    388     printUnicodeString("full-uppercased/tr: ", (t=s).toUpper(tr));
    389     /* titlecase/English */
    390     printUnicodeString("full-titlecased/en: ", (t=s).toTitle(NULL, en));
    391     /* titlecase/Turkish */
    392     printUnicodeString("full-titlecased/tr: ", (t=s).toTitle(NULL, tr));
    393     /* case-folde/default */
    394     printUnicodeString("full-case-folded/default: ", (t=s).foldCase(U_FOLD_CASE_DEFAULT));
    395     /* case-folde/Turkic */
    396     printUnicodeString("full-case-folded/Turkic: ", (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I));
    397 }
    398 
    399 // sample code for UnicodeString storage models ----------------------------- ***
    400 
    401 static const UChar readonly[]={
    402     0x61, 0x31, 0x20ac
    403 };
    404 static UChar writeable[]={
    405     0x62, 0x32, 0xdbc0, 0xdc01 // includes a surrogate pair for a supplementary code point
    406 };
    407 static char out[100];
    408 
    409 static void
    410 demoUnicodeStringStorage() {
    411     // These sample code lines illustrate how to use UnicodeString, and the
    412     // comments tell what happens internally. There are no APIs to observe
    413     // most of this programmatically, except for stepping into the code
    414     // with a debugger.
    415     // This is by design to hide such details from the user.
    416     int32_t i;
    417 
    418     printf("\n* demoUnicodeStringStorage() ------- ***\n\n");
    419 
    420     // * UnicodeString with internally stored contents
    421     // instantiate a UnicodeString from a single code point
    422     // the few (2) UChars will be stored in the object itself
    423     UnicodeString one((UChar32)0x24001);
    424     // this copies the few UChars into the "two" object
    425     UnicodeString two=one;
    426     printf("length of short string copy: %d\n", two.length());
    427     // set "one" to contain the 3 UChars from readonly
    428     // this setTo() variant copies the characters
    429     one.setTo(readonly, UPRV_LENGTHOF(readonly));
    430 
    431     // * UnicodeString with allocated contents
    432     // build a longer string that will not fit into the object's buffer
    433     one+=UnicodeString(writeable, UPRV_LENGTHOF(writeable));
    434     one+=one;
    435     one+=one;
    436     printf("length of longer string: %d\n", one.length());
    437     // copying will use the same allocated buffer and increment the reference
    438     // counter
    439     two=one;
    440     printf("length of longer string copy: %d\n", two.length());
    441 
    442     // * UnicodeString using readonly-alias to a const UChar array
    443     // construct a string that aliases a readonly buffer
    444     UnicodeString three(FALSE, readonly, UPRV_LENGTHOF(readonly));
    445     printUnicodeString("readonly-alias string: ", three);
    446     // copy-on-write: any modification to the string results in
    447     // a copy to either the internal buffer or to a newly allocated one
    448     three.setCharAt(1, 0x39);
    449     printUnicodeString("readonly-aliasing string after modification: ", three);
    450     // the aliased array is not modified
    451     for(i=0; i<three.length(); ++i) {
    452         printf("readonly buffer[%d] after modifying its string: 0x%lx\n",
    453                i, readonly[i]);
    454     }
    455     // setTo() readonly alias
    456     one.setTo(FALSE, writeable, UPRV_LENGTHOF(writeable));
    457     // copying the readonly-alias object with fastCopyFrom() (new in ICU 2.4)
    458     // will readonly-alias the same buffer
    459     two.fastCopyFrom(one);
    460     printUnicodeString("fastCopyFrom(readonly alias of \"writeable\" array): ", two);
    461     printf("verify that a fastCopyFrom(readonly alias) uses the same buffer pointer: %d (should be 1)\n",
    462         one.getBuffer()==two.getBuffer());
    463     // a normal assignment will clone the contents (new in ICU 2.4)
    464     two=one;
    465     printf("verify that a regular copy of a readonly alias uses a different buffer pointer: %d (should be 0)\n",
    466         one.getBuffer()==two.getBuffer());
    467 
    468     // * UnicodeString using writeable-alias to a non-const UChar array
    469     UnicodeString four(writeable, UPRV_LENGTHOF(writeable), UPRV_LENGTHOF(writeable));
    470     printUnicodeString("writeable-alias string: ", four);
    471     // a modification writes through to the buffer
    472     four.setCharAt(1, 0x39);
    473     for(i=0; i<four.length(); ++i) {
    474         printf("writeable-alias backing buffer[%d]=0x%lx "
    475                "after modification\n", i, writeable[i]);
    476     }
    477     // a copy will not alias any more;
    478     // instead, it will get a copy of the contents into allocated memory
    479     two=four;
    480     two.setCharAt(1, 0x21);
    481     for(i=0; i<two.length(); ++i) {
    482         printf("writeable-alias backing buffer[%d]=0x%lx after "
    483                "modification of string copy\n", i, writeable[i]);
    484     }
    485     // setTo() writeable alias, capacity==length
    486     one.setTo(writeable, UPRV_LENGTHOF(writeable), UPRV_LENGTHOF(writeable));
    487     // grow the string - it will not fit into the backing buffer any more
    488     // and will get copied before modification
    489     one.append((UChar)0x40);
    490     // shrink it back so it would fit
    491     one.truncate(one.length()-1);
    492     // we still operate on the copy
    493     one.setCharAt(1, 0x25);
    494     printf("string after growing too much and then shrinking[1]=0x%lx\n"
    495            "                          backing store for this[1]=0x%lx\n",
    496            one.charAt(1), writeable[1]);
    497     // if we need it in the original buffer, then extract() to it
    498     // extract() does not do anything if the string aliases that same buffer
    499     // i=min(one.length(), length of array)
    500     if(one.length()<UPRV_LENGTHOF(writeable)) {
    501         i=one.length();
    502     } else {
    503         i=UPRV_LENGTHOF(writeable);
    504     }
    505     one.extract(0, i, writeable);
    506     for(i=0; i<UPRV_LENGTHOF(writeable); ++i) {
    507         printf("writeable-alias backing buffer[%d]=0x%lx after re-extract\n",
    508                i, writeable[i]);
    509     }
    510 }
    511 
    512 // sample code for UnicodeString instantiations ----------------------------- ***
    513 
    514 static void
    515 demoUnicodeStringInit() {
    516     // *** Make sure to read about invariant characters in utypes.h! ***
    517     // Initialization of Unicode strings from C literals works _only_ for
    518     // invariant characters!
    519 
    520     printf("\n* demoUnicodeStringInit() ---------- ***\n\n");
    521 
    522     // the string literal is 32 chars long - this must be counted for the macro
    523     UnicodeString invariantOnly=UNICODE_STRING("such characters are safe 123 %-.", 32);
    524 
    525     /*
    526      * In C, we need two macros: one to declare the UChar[] array, and
    527      * one to populate it; the second one is a noop on platforms where
    528      * wchar_t is compatible with UChar and ASCII-based.
    529      * The length of the string literal must be counted for both macros.
    530      */
    531     /* declare the invString array for the string */
    532     U_STRING_DECL(invString, "such characters are safe 123 %-.", 32);
    533     /* populate it with the characters */
    534     U_STRING_INIT(invString, "such characters are safe 123 %-.", 32);
    535 
    536     // compare the C and C++ strings
    537     printf("C and C++ Unicode strings are equal: %d\n", invariantOnly==UnicodeString(TRUE, invString, 32));
    538 
    539     /*
    540      * convert between char * and UChar * strings that
    541      * contain only invariant characters
    542      */
    543     static const char *cs1="such characters are safe 123 %-.";
    544     static UChar us1[40];
    545     static char cs2[40];
    546     u_charsToUChars(cs1, us1, 33); /* include the terminating NUL */
    547     u_UCharsToChars(us1, cs2, 33);
    548     printf("char * -> UChar * -> char * with only "
    549            "invariant characters: \"%s\"\n",
    550            cs2);
    551 
    552     // initialize a UnicodeString from a string literal that contains
    553     // escape sequences written with invariant characters
    554     // do not forget to duplicate the backslashes for ICU to see them
    555     // then, count each double backslash only once!
    556     UnicodeString german=UNICODE_STRING(
    557         "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n", 64).
    558         unescape();
    559     printUnicodeString("german UnicodeString from unescaping:\n    ", german);
    560 
    561     /*
    562      * C: convert and unescape a char * string with only invariant
    563      * characters to fill a UChar * string
    564      */
    565     UChar buffer[200];
    566     int32_t length;
    567     length=u_unescape(
    568         "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
    569         buffer, UPRV_LENGTHOF(buffer));
    570     printf("german C Unicode string from char * unescaping: (length %d)\n    ", length);
    571     printUnicodeString("", UnicodeString(buffer));
    572 }
    573 
    574 extern int
    575 main(int argc, const char *argv[]) {
    576     UErrorCode errorCode=U_ZERO_ERROR;
    577 
    578     // Note: Using a global variable for any object is not exactly thread-safe...
    579 
    580     // You can change this call to e.g. ucnv_open("UTF-8", &errorCode) if you pipe
    581     // the output to a file and look at it with a Unicode-capable editor.
    582     // This will currently affect only the printUString() function, see the code above.
    583     // printUnicodeString() could use this, too, by changing to an extract() overload
    584     // that takes a UConverter argument.
    585     cnv=ucnv_open(NULL, &errorCode);
    586     if(U_FAILURE(errorCode)) {
    587         fprintf(stderr, "error %s opening the default converter\n", u_errorName(errorCode));
    588         return errorCode;
    589     }
    590 
    591     ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, NULL, NULL, &errorCode);
    592     if(U_FAILURE(errorCode)) {
    593         fprintf(stderr, "error %s setting the escape callback in the default converter\n", u_errorName(errorCode));
    594         ucnv_close(cnv);
    595         return errorCode;
    596     }
    597 
    598     demo_utf_h_macros();
    599     demo_C_Unicode_strings();
    600     demoCaseMapInC();
    601     demoCaseMapInCPlusPlus();
    602     demoUnicodeStringStorage();
    603     demoUnicodeStringInit();
    604 
    605     ucnv_close(cnv);
    606     return 0;
    607 }
    608