Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  uinvchar.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:2
     12 *
     13 *   created on: 2004sep14
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Functions for handling invariant characters, moved here from putil.c
     17 *   for better modularization.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/ustring.h"
     22 #include "udataswp.h"
     23 #include "cstring.h"
     24 #include "cmemory.h"
     25 #include "uassert.h"
     26 #include "uinvchar.h"
     27 
     28 /* invariant-character handling --------------------------------------------- */
     29 
     30 /*
     31  * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
     32  * appropriately for most EBCDIC codepages.
     33  *
     34  * They currently also map most other ASCII graphic characters,
     35  * appropriately for codepages 37 and 1047.
     36  * Exceptions: The characters for []^ have different codes in 37 & 1047.
     37  * Both versions are mapped to ASCII.
     38  *
     39  *    ASCII 37 1047
     40  * [     5B BA   AD
     41  * ]     5D BB   BD
     42  * ^     5E B0   5F
     43  *
     44  * There are no mappings for variant characters from Unicode to EBCDIC.
     45  *
     46  * Currently, C0 control codes are also included in these maps.
     47  * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
     48  * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
     49  * but there is no mapping for ASCII LF back to EBCDIC.
     50  *
     51  *    ASCII EBCDIC S/390-OE
     52  * LF    0A     25       15
     53  * NEL   85     15       25
     54  *
     55  * The maps below explicitly exclude the variant
     56  * control and graphical characters that are in ASCII-based
     57  * codepages at 0x80 and above.
     58  * "No mapping" is expressed by mapping to a 00 byte.
     59  *
     60  * These tables do not establish a converter or a codepage.
     61  */
     62 
     63 static const uint8_t asciiFromEbcdic[256]={
     64     0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     65     0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
     66     0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
     67     0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
     68 
     69     0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
     70     0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
     71     0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
     72     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
     73 
     74     0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     75     0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     76     0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
     77     0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
     78 
     79     0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     80     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     81     0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     82     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
     83 };
     84 
     85 static const uint8_t ebcdicFromAscii[256]={
     86     0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
     87     0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
     88     0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
     89     0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
     90 
     91     0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
     92     0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
     93     0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
     94     0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
     95 
     96     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     97     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     98     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     99     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    100 
    101     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    102     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    103     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    104     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    105 };
    106 
    107 /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
    108 static const uint8_t lowercaseAsciiFromEbcdic[256]={
    109     0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    110     0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
    111     0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
    112     0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
    113 
    114     0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
    115     0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
    116     0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
    117     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
    118 
    119     0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    120     0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    121     0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
    122     0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
    123 
    124     0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    125     0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    126     0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    127     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
    128 };
    129 
    130 /*
    131  * Bit sets indicating which characters of the ASCII repertoire
    132  * (by ASCII/Unicode code) are "invariant".
    133  * See utypes.h for more details.
    134  *
    135  * As invariant are considered the characters of the ASCII repertoire except
    136  * for the following:
    137  * 21  '!' <exclamation mark>
    138  * 23  '#' <number sign>
    139  * 24  '$' <dollar sign>
    140  *
    141  * 40  '@' <commercial at>
    142  *
    143  * 5b  '[' <left bracket>
    144  * 5c  '\' <backslash>
    145  * 5d  ']' <right bracket>
    146  * 5e  '^' <circumflex>
    147  *
    148  * 60  '`' <grave accent>
    149  *
    150  * 7b  '{' <left brace>
    151  * 7c  '|' <vertical line>
    152  * 7d  '}' <right brace>
    153  * 7e  '~' <tilde>
    154  */
    155 static const uint32_t invariantChars[4]={
    156     0xfffffbff, /* 00..1f but not 0a */
    157     0xffffffe5, /* 20..3f but not 21 23 24 */
    158     0x87fffffe, /* 40..5f but not 40 5b..5e */
    159     0x87fffffe  /* 60..7f but not 60 7b..7e */
    160 };
    161 
    162 /*
    163  * test unsigned types (or values known to be non-negative) for invariant characters,
    164  * tests ASCII-family character values
    165  */
    166 #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
    167 
    168 /* test signed types for invariant characters, adds test for positive values */
    169 #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
    170 
    171 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    172 #define CHAR_TO_UCHAR(c) c
    173 #define UCHAR_TO_CHAR(c) c
    174 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    175 #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
    176 #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
    177 #else
    178 #   error U_CHARSET_FAMILY is not valid
    179 #endif
    180 
    181 
    182 U_CAPI void U_EXPORT2
    183 u_charsToUChars(const char *cs, UChar *us, int32_t length) {
    184     UChar u;
    185     uint8_t c;
    186 
    187     /*
    188      * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
    189      * For EBCDIC systems, this works for characters with codes from
    190      * codepages 37 and 1047 or compatible.
    191      */
    192     while(length>0) {
    193         c=(uint8_t)(*cs++);
    194         u=(UChar)CHAR_TO_UCHAR(c);
    195         U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
    196         *us++=u;
    197         --length;
    198     }
    199 }
    200 
    201 U_CAPI void U_EXPORT2
    202 u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
    203     UChar u;
    204 
    205     while(length>0) {
    206         u=*us++;
    207         if(!UCHAR_IS_INVARIANT(u)) {
    208             U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
    209             u=0;
    210         }
    211         *cs++=(char)UCHAR_TO_CHAR(u);
    212         --length;
    213     }
    214 }
    215 
    216 U_CAPI UBool U_EXPORT2
    217 uprv_isInvariantString(const char *s, int32_t length) {
    218     uint8_t c;
    219 
    220     for(;;) {
    221         if(length<0) {
    222             /* NUL-terminated */
    223             c=(uint8_t)*s++;
    224             if(c==0) {
    225                 break;
    226             }
    227         } else {
    228             /* count length */
    229             if(length==0) {
    230                 break;
    231             }
    232             --length;
    233             c=(uint8_t)*s++;
    234             if(c==0) {
    235                 continue; /* NUL is invariant */
    236             }
    237         }
    238         /* c!=0 now, one branch below checks c==0 for variant characters */
    239 
    240         /*
    241          * no assertions here because these functions are legitimately called
    242          * for strings with variant characters
    243          */
    244 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    245         if(!UCHAR_IS_INVARIANT(c)) {
    246             return FALSE; /* found a variant char */
    247         }
    248 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    249         c=CHAR_TO_UCHAR(c);
    250         if(c==0 || !UCHAR_IS_INVARIANT(c)) {
    251             return FALSE; /* found a variant char */
    252         }
    253 #else
    254 #   error U_CHARSET_FAMILY is not valid
    255 #endif
    256     }
    257     return TRUE;
    258 }
    259 
    260 U_CAPI UBool U_EXPORT2
    261 uprv_isInvariantUString(const UChar *s, int32_t length) {
    262     UChar c;
    263 
    264     for(;;) {
    265         if(length<0) {
    266             /* NUL-terminated */
    267             c=*s++;
    268             if(c==0) {
    269                 break;
    270             }
    271         } else {
    272             /* count length */
    273             if(length==0) {
    274                 break;
    275             }
    276             --length;
    277             c=*s++;
    278         }
    279 
    280         /*
    281          * no assertions here because these functions are legitimately called
    282          * for strings with variant characters
    283          */
    284         if(!UCHAR_IS_INVARIANT(c)) {
    285             return FALSE; /* found a variant char */
    286         }
    287     }
    288     return TRUE;
    289 }
    290 
    291 /* UDataSwapFn implementations used in udataswp.c ------- */
    292 
    293 /* convert ASCII to EBCDIC and verify that all characters are invariant */
    294 U_CAPI int32_t U_EXPORT2
    295 uprv_ebcdicFromAscii(const UDataSwapper *ds,
    296                      const void *inData, int32_t length, void *outData,
    297                      UErrorCode *pErrorCode) {
    298     const uint8_t *s;
    299     uint8_t *t;
    300     uint8_t c;
    301 
    302     int32_t count;
    303 
    304     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    305         return 0;
    306     }
    307     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
    308         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    309         return 0;
    310     }
    311 
    312     /* setup and swapping */
    313     s=(const uint8_t *)inData;
    314     t=(uint8_t *)outData;
    315     count=length;
    316     while(count>0) {
    317         c=*s++;
    318         if(!UCHAR_IS_INVARIANT(c)) {
    319             udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
    320                              length, length-count);
    321             *pErrorCode=U_INVALID_CHAR_FOUND;
    322             return 0;
    323         }
    324         *t++=ebcdicFromAscii[c];
    325         --count;
    326     }
    327 
    328     return length;
    329 }
    330 
    331 /* this function only checks and copies ASCII strings without conversion */
    332 U_CFUNC int32_t
    333 uprv_copyAscii(const UDataSwapper *ds,
    334                const void *inData, int32_t length, void *outData,
    335                UErrorCode *pErrorCode) {
    336     const uint8_t *s;
    337     uint8_t c;
    338 
    339     int32_t count;
    340 
    341     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    342         return 0;
    343     }
    344     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
    345         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    346         return 0;
    347     }
    348 
    349     /* setup and checking */
    350     s=(const uint8_t *)inData;
    351     count=length;
    352     while(count>0) {
    353         c=*s++;
    354         if(!UCHAR_IS_INVARIANT(c)) {
    355             udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
    356                              length, length-count);
    357             *pErrorCode=U_INVALID_CHAR_FOUND;
    358             return 0;
    359         }
    360         --count;
    361     }
    362 
    363     if(length>0 && inData!=outData) {
    364         uprv_memcpy(outData, inData, length);
    365     }
    366 
    367     return length;
    368 }
    369 
    370 /* convert EBCDIC to ASCII and verify that all characters are invariant */
    371 U_CFUNC int32_t
    372 uprv_asciiFromEbcdic(const UDataSwapper *ds,
    373                      const void *inData, int32_t length, void *outData,
    374                      UErrorCode *pErrorCode) {
    375     const uint8_t *s;
    376     uint8_t *t;
    377     uint8_t c;
    378 
    379     int32_t count;
    380 
    381     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    382         return 0;
    383     }
    384     if(ds==NULL || inData==NULL || length<0 ||  (length>0 && outData==NULL)) {
    385         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    386         return 0;
    387     }
    388 
    389     /* setup and swapping */
    390     s=(const uint8_t *)inData;
    391     t=(uint8_t *)outData;
    392     count=length;
    393     while(count>0) {
    394         c=*s++;
    395         if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
    396             udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
    397                              length, length-count);
    398             *pErrorCode=U_INVALID_CHAR_FOUND;
    399             return 0;
    400         }
    401         *t++=c;
    402         --count;
    403     }
    404 
    405     return length;
    406 }
    407 
    408 /* this function only checks and copies EBCDIC strings without conversion */
    409 U_CFUNC int32_t
    410 uprv_copyEbcdic(const UDataSwapper *ds,
    411                 const void *inData, int32_t length, void *outData,
    412                 UErrorCode *pErrorCode) {
    413     const uint8_t *s;
    414     uint8_t c;
    415 
    416     int32_t count;
    417 
    418     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    419         return 0;
    420     }
    421     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
    422         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    423         return 0;
    424     }
    425 
    426     /* setup and checking */
    427     s=(const uint8_t *)inData;
    428     count=length;
    429     while(count>0) {
    430         c=*s++;
    431         if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
    432             udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
    433                              length, length-count);
    434             *pErrorCode=U_INVALID_CHAR_FOUND;
    435             return 0;
    436         }
    437         --count;
    438     }
    439 
    440     if(length>0 && inData!=outData) {
    441         uprv_memcpy(outData, inData, length);
    442     }
    443 
    444     return length;
    445 }
    446 
    447 /* compare invariant strings; variant characters compare less than others and unlike each other */
    448 U_CFUNC int32_t
    449 uprv_compareInvAscii(const UDataSwapper *ds,
    450                      const char *outString, int32_t outLength,
    451                      const UChar *localString, int32_t localLength) {
    452     int32_t minLength;
    453     UChar32 c1, c2;
    454     uint8_t c;
    455 
    456     if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
    457         return 0;
    458     }
    459 
    460     if(outLength<0) {
    461         outLength=(int32_t)uprv_strlen(outString);
    462     }
    463     if(localLength<0) {
    464         localLength=u_strlen(localString);
    465     }
    466 
    467     minLength= outLength<localLength ? outLength : localLength;
    468 
    469     while(minLength>0) {
    470         c=(uint8_t)*outString++;
    471         if(UCHAR_IS_INVARIANT(c)) {
    472             c1=c;
    473         } else {
    474             c1=-1;
    475         }
    476 
    477         c2=*localString++;
    478         if(!UCHAR_IS_INVARIANT(c2)) {
    479             c2=-2;
    480         }
    481 
    482         if((c1-=c2)!=0) {
    483             return c1;
    484         }
    485 
    486         --minLength;
    487     }
    488 
    489     /* strings start with same prefix, compare lengths */
    490     return outLength-localLength;
    491 }
    492 
    493 U_CFUNC int32_t
    494 uprv_compareInvEbcdic(const UDataSwapper *ds,
    495                       const char *outString, int32_t outLength,
    496                       const UChar *localString, int32_t localLength) {
    497     int32_t minLength;
    498     UChar32 c1, c2;
    499     uint8_t c;
    500 
    501     if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
    502         return 0;
    503     }
    504 
    505     if(outLength<0) {
    506         outLength=(int32_t)uprv_strlen(outString);
    507     }
    508     if(localLength<0) {
    509         localLength=u_strlen(localString);
    510     }
    511 
    512     minLength= outLength<localLength ? outLength : localLength;
    513 
    514     while(minLength>0) {
    515         c=(uint8_t)*outString++;
    516         if(c==0) {
    517             c1=0;
    518         } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
    519             /* c1 is set */
    520         } else {
    521             c1=-1;
    522         }
    523 
    524         c2=*localString++;
    525         if(!UCHAR_IS_INVARIANT(c2)) {
    526             c2=-2;
    527         }
    528 
    529         if((c1-=c2)!=0) {
    530             return c1;
    531         }
    532 
    533         --minLength;
    534     }
    535 
    536     /* strings start with same prefix, compare lengths */
    537     return outLength-localLength;
    538 }
    539 
    540 U_CAPI int32_t U_EXPORT2
    541 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
    542     int32_t c1, c2;
    543 
    544     for(;; ++s1, ++s2) {
    545         c1=(uint8_t)*s1;
    546         c2=(uint8_t)*s2;
    547         if(c1!=c2) {
    548             if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
    549                 c1=-(int32_t)(uint8_t)*s1;
    550             }
    551             if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
    552                 c2=-(int32_t)(uint8_t)*s2;
    553             }
    554             return c1-c2;
    555         } else if(c1==0) {
    556             return 0;
    557         }
    558     }
    559 }
    560 
    561 U_CAPI char U_EXPORT2
    562 uprv_ebcdicToLowercaseAscii(char c) {
    563     return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
    564 }
    565 
    566 U_INTERNAL uint8_t* U_EXPORT2
    567 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
    568 {
    569   uint8_t *orig_dst = dst;
    570 
    571   if(n==-1) {
    572     n = uprv_strlen((const char*)src)+1; /* copy NUL */
    573   }
    574   /* copy non-null */
    575   while(*src && n>0) {
    576     *(dst++) = asciiFromEbcdic[*(src++)];
    577     n--;
    578   }
    579   /* pad */
    580   while(n>0) {
    581     *(dst++) = 0;
    582     n--;
    583   }
    584   return orig_dst;
    585 }
    586 
    587 U_INTERNAL uint8_t* U_EXPORT2
    588 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
    589 {
    590   uint8_t *orig_dst = dst;
    591 
    592   if(n==-1) {
    593     n = uprv_strlen((const char*)src)+1; /* copy NUL */
    594   }
    595   /* copy non-null */
    596   while(*src && n>0) {
    597     char ch = ebcdicFromAscii[*(src++)];
    598     if(ch == 0) {
    599       ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
    600     }
    601     *(dst++) = ch;
    602     n--;
    603   }
    604   /* pad */
    605   while(n>0) {
    606     *(dst++) = 0;
    607     n--;
    608   }
    609   return orig_dst;
    610 }
    611 
    612