1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1999-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: uinvchar.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:2 12 * 13 * created on: 2004sep14 14 * created by: Markus W. Scherer 15 * 16 * Functions for handling invariant characters, moved here from putil.c 17 * for better modularization. 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/ustring.h" 22 #include "udataswp.h" 23 #include "cstring.h" 24 #include "cmemory.h" 25 #include "uassert.h" 26 #include "uinvchar.h" 27 28 /* invariant-character handling --------------------------------------------- */ 29 30 /* 31 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) 32 * appropriately for most EBCDIC codepages. 33 * 34 * They currently also map most other ASCII graphic characters, 35 * appropriately for codepages 37 and 1047. 36 * Exceptions: The characters for []^ have different codes in 37 & 1047. 37 * Both versions are mapped to ASCII. 38 * 39 * ASCII 37 1047 40 * [ 5B BA AD 41 * ] 5D BB BD 42 * ^ 5E B0 5F 43 * 44 * There are no mappings for variant characters from Unicode to EBCDIC. 45 * 46 * Currently, C0 control codes are also included in these maps. 47 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other 48 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), 49 * but there is no mapping for ASCII LF back to EBCDIC. 50 * 51 * ASCII EBCDIC S/390-OE 52 * LF 0A 25 15 53 * NEL 85 15 25 54 * 55 * The maps below explicitly exclude the variant 56 * control and graphical characters that are in ASCII-based 57 * codepages at 0x80 and above. 58 * "No mapping" is expressed by mapping to a 00 byte. 59 * 60 * These tables do not establish a converter or a codepage. 61 */ 62 63 static const uint8_t asciiFromEbcdic[256]={ 64 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 65 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, 66 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, 67 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, 68 69 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 70 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 71 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 73 74 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 75 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 76 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, 77 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, 78 79 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 80 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 81 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 82 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 83 }; 84 85 static const uint8_t ebcdicFromAscii[256]={ 86 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 87 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, 88 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, 89 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, 90 91 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 92 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d, 93 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 94 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07, 95 96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100 101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 105 }; 106 107 /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ 108 static const uint8_t lowercaseAsciiFromEbcdic[256]={ 109 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 110 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, 111 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, 112 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, 113 114 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 115 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 116 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 117 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 118 119 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 120 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 121 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, 122 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, 123 124 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 125 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 126 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 127 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 128 }; 129 130 /* 131 * Bit sets indicating which characters of the ASCII repertoire 132 * (by ASCII/Unicode code) are "invariant". 133 * See utypes.h for more details. 134 * 135 * As invariant are considered the characters of the ASCII repertoire except 136 * for the following: 137 * 21 '!' <exclamation mark> 138 * 23 '#' <number sign> 139 * 24 '$' <dollar sign> 140 * 141 * 40 '@' <commercial at> 142 * 143 * 5b '[' <left bracket> 144 * 5c '\' <backslash> 145 * 5d ']' <right bracket> 146 * 5e '^' <circumflex> 147 * 148 * 60 '`' <grave accent> 149 * 150 * 7b '{' <left brace> 151 * 7c '|' <vertical line> 152 * 7d '}' <right brace> 153 * 7e '~' <tilde> 154 */ 155 static const uint32_t invariantChars[4]={ 156 0xfffffbff, /* 00..1f but not 0a */ 157 0xffffffe5, /* 20..3f but not 21 23 24 */ 158 0x87fffffe, /* 40..5f but not 40 5b..5e */ 159 0x87fffffe /* 60..7f but not 60 7b..7e */ 160 }; 161 162 /* 163 * test unsigned types (or values known to be non-negative) for invariant characters, 164 * tests ASCII-family character values 165 */ 166 #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0) 167 168 /* test signed types for invariant characters, adds test for positive values */ 169 #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) 170 171 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 172 #define CHAR_TO_UCHAR(c) c 173 #define UCHAR_TO_CHAR(c) c 174 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 175 #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] 176 #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] 177 #else 178 # error U_CHARSET_FAMILY is not valid 179 #endif 180 181 182 U_CAPI void U_EXPORT2 183 u_charsToUChars(const char *cs, UChar *us, int32_t length) { 184 UChar u; 185 uint8_t c; 186 187 /* 188 * Allow the entire ASCII repertoire to be mapped _to_ Unicode. 189 * For EBCDIC systems, this works for characters with codes from 190 * codepages 37 and 1047 or compatible. 191 */ 192 while(length>0) { 193 c=(uint8_t)(*cs++); 194 u=(UChar)CHAR_TO_UCHAR(c); 195 U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ 196 *us++=u; 197 --length; 198 } 199 } 200 201 U_CAPI void U_EXPORT2 202 u_UCharsToChars(const UChar *us, char *cs, int32_t length) { 203 UChar u; 204 205 while(length>0) { 206 u=*us++; 207 if(!UCHAR_IS_INVARIANT(u)) { 208 U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ 209 u=0; 210 } 211 *cs++=(char)UCHAR_TO_CHAR(u); 212 --length; 213 } 214 } 215 216 U_CAPI UBool U_EXPORT2 217 uprv_isInvariantString(const char *s, int32_t length) { 218 uint8_t c; 219 220 for(;;) { 221 if(length<0) { 222 /* NUL-terminated */ 223 c=(uint8_t)*s++; 224 if(c==0) { 225 break; 226 } 227 } else { 228 /* count length */ 229 if(length==0) { 230 break; 231 } 232 --length; 233 c=(uint8_t)*s++; 234 if(c==0) { 235 continue; /* NUL is invariant */ 236 } 237 } 238 /* c!=0 now, one branch below checks c==0 for variant characters */ 239 240 /* 241 * no assertions here because these functions are legitimately called 242 * for strings with variant characters 243 */ 244 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 245 if(!UCHAR_IS_INVARIANT(c)) { 246 return FALSE; /* found a variant char */ 247 } 248 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 249 c=CHAR_TO_UCHAR(c); 250 if(c==0 || !UCHAR_IS_INVARIANT(c)) { 251 return FALSE; /* found a variant char */ 252 } 253 #else 254 # error U_CHARSET_FAMILY is not valid 255 #endif 256 } 257 return TRUE; 258 } 259 260 U_CAPI UBool U_EXPORT2 261 uprv_isInvariantUString(const UChar *s, int32_t length) { 262 UChar c; 263 264 for(;;) { 265 if(length<0) { 266 /* NUL-terminated */ 267 c=*s++; 268 if(c==0) { 269 break; 270 } 271 } else { 272 /* count length */ 273 if(length==0) { 274 break; 275 } 276 --length; 277 c=*s++; 278 } 279 280 /* 281 * no assertions here because these functions are legitimately called 282 * for strings with variant characters 283 */ 284 if(!UCHAR_IS_INVARIANT(c)) { 285 return FALSE; /* found a variant char */ 286 } 287 } 288 return TRUE; 289 } 290 291 /* UDataSwapFn implementations used in udataswp.c ------- */ 292 293 /* convert ASCII to EBCDIC and verify that all characters are invariant */ 294 U_CAPI int32_t U_EXPORT2 295 uprv_ebcdicFromAscii(const UDataSwapper *ds, 296 const void *inData, int32_t length, void *outData, 297 UErrorCode *pErrorCode) { 298 const uint8_t *s; 299 uint8_t *t; 300 uint8_t c; 301 302 int32_t count; 303 304 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 305 return 0; 306 } 307 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 308 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 309 return 0; 310 } 311 312 /* setup and swapping */ 313 s=(const uint8_t *)inData; 314 t=(uint8_t *)outData; 315 count=length; 316 while(count>0) { 317 c=*s++; 318 if(!UCHAR_IS_INVARIANT(c)) { 319 udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", 320 length, length-count); 321 *pErrorCode=U_INVALID_CHAR_FOUND; 322 return 0; 323 } 324 *t++=ebcdicFromAscii[c]; 325 --count; 326 } 327 328 return length; 329 } 330 331 /* this function only checks and copies ASCII strings without conversion */ 332 U_CFUNC int32_t 333 uprv_copyAscii(const UDataSwapper *ds, 334 const void *inData, int32_t length, void *outData, 335 UErrorCode *pErrorCode) { 336 const uint8_t *s; 337 uint8_t c; 338 339 int32_t count; 340 341 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 342 return 0; 343 } 344 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 345 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 346 return 0; 347 } 348 349 /* setup and checking */ 350 s=(const uint8_t *)inData; 351 count=length; 352 while(count>0) { 353 c=*s++; 354 if(!UCHAR_IS_INVARIANT(c)) { 355 udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n", 356 length, length-count); 357 *pErrorCode=U_INVALID_CHAR_FOUND; 358 return 0; 359 } 360 --count; 361 } 362 363 if(length>0 && inData!=outData) { 364 uprv_memcpy(outData, inData, length); 365 } 366 367 return length; 368 } 369 370 /* convert EBCDIC to ASCII and verify that all characters are invariant */ 371 U_CFUNC int32_t 372 uprv_asciiFromEbcdic(const UDataSwapper *ds, 373 const void *inData, int32_t length, void *outData, 374 UErrorCode *pErrorCode) { 375 const uint8_t *s; 376 uint8_t *t; 377 uint8_t c; 378 379 int32_t count; 380 381 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 382 return 0; 383 } 384 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 385 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 386 return 0; 387 } 388 389 /* setup and swapping */ 390 s=(const uint8_t *)inData; 391 t=(uint8_t *)outData; 392 count=length; 393 while(count>0) { 394 c=*s++; 395 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { 396 udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n", 397 length, length-count); 398 *pErrorCode=U_INVALID_CHAR_FOUND; 399 return 0; 400 } 401 *t++=c; 402 --count; 403 } 404 405 return length; 406 } 407 408 /* this function only checks and copies EBCDIC strings without conversion */ 409 U_CFUNC int32_t 410 uprv_copyEbcdic(const UDataSwapper *ds, 411 const void *inData, int32_t length, void *outData, 412 UErrorCode *pErrorCode) { 413 const uint8_t *s; 414 uint8_t c; 415 416 int32_t count; 417 418 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 419 return 0; 420 } 421 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 422 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 423 return 0; 424 } 425 426 /* setup and checking */ 427 s=(const uint8_t *)inData; 428 count=length; 429 while(count>0) { 430 c=*s++; 431 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { 432 udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", 433 length, length-count); 434 *pErrorCode=U_INVALID_CHAR_FOUND; 435 return 0; 436 } 437 --count; 438 } 439 440 if(length>0 && inData!=outData) { 441 uprv_memcpy(outData, inData, length); 442 } 443 444 return length; 445 } 446 447 /* compare invariant strings; variant characters compare less than others and unlike each other */ 448 U_CFUNC int32_t 449 uprv_compareInvAscii(const UDataSwapper *ds, 450 const char *outString, int32_t outLength, 451 const UChar *localString, int32_t localLength) { 452 int32_t minLength; 453 UChar32 c1, c2; 454 uint8_t c; 455 456 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { 457 return 0; 458 } 459 460 if(outLength<0) { 461 outLength=(int32_t)uprv_strlen(outString); 462 } 463 if(localLength<0) { 464 localLength=u_strlen(localString); 465 } 466 467 minLength= outLength<localLength ? outLength : localLength; 468 469 while(minLength>0) { 470 c=(uint8_t)*outString++; 471 if(UCHAR_IS_INVARIANT(c)) { 472 c1=c; 473 } else { 474 c1=-1; 475 } 476 477 c2=*localString++; 478 if(!UCHAR_IS_INVARIANT(c2)) { 479 c2=-2; 480 } 481 482 if((c1-=c2)!=0) { 483 return c1; 484 } 485 486 --minLength; 487 } 488 489 /* strings start with same prefix, compare lengths */ 490 return outLength-localLength; 491 } 492 493 U_CFUNC int32_t 494 uprv_compareInvEbcdic(const UDataSwapper *ds, 495 const char *outString, int32_t outLength, 496 const UChar *localString, int32_t localLength) { 497 int32_t minLength; 498 UChar32 c1, c2; 499 uint8_t c; 500 501 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { 502 return 0; 503 } 504 505 if(outLength<0) { 506 outLength=(int32_t)uprv_strlen(outString); 507 } 508 if(localLength<0) { 509 localLength=u_strlen(localString); 510 } 511 512 minLength= outLength<localLength ? outLength : localLength; 513 514 while(minLength>0) { 515 c=(uint8_t)*outString++; 516 if(c==0) { 517 c1=0; 518 } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { 519 /* c1 is set */ 520 } else { 521 c1=-1; 522 } 523 524 c2=*localString++; 525 if(!UCHAR_IS_INVARIANT(c2)) { 526 c2=-2; 527 } 528 529 if((c1-=c2)!=0) { 530 return c1; 531 } 532 533 --minLength; 534 } 535 536 /* strings start with same prefix, compare lengths */ 537 return outLength-localLength; 538 } 539 540 U_CAPI int32_t U_EXPORT2 541 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { 542 int32_t c1, c2; 543 544 for(;; ++s1, ++s2) { 545 c1=(uint8_t)*s1; 546 c2=(uint8_t)*s2; 547 if(c1!=c2) { 548 if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) { 549 c1=-(int32_t)(uint8_t)*s1; 550 } 551 if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) { 552 c2=-(int32_t)(uint8_t)*s2; 553 } 554 return c1-c2; 555 } else if(c1==0) { 556 return 0; 557 } 558 } 559 } 560 561 U_CAPI char U_EXPORT2 562 uprv_ebcdicToLowercaseAscii(char c) { 563 return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; 564 } 565 566 U_INTERNAL uint8_t* U_EXPORT2 567 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) 568 { 569 uint8_t *orig_dst = dst; 570 571 if(n==-1) { 572 n = uprv_strlen((const char*)src)+1; /* copy NUL */ 573 } 574 /* copy non-null */ 575 while(*src && n>0) { 576 *(dst++) = asciiFromEbcdic[*(src++)]; 577 n--; 578 } 579 /* pad */ 580 while(n>0) { 581 *(dst++) = 0; 582 n--; 583 } 584 return orig_dst; 585 } 586 587 U_INTERNAL uint8_t* U_EXPORT2 588 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) 589 { 590 uint8_t *orig_dst = dst; 591 592 if(n==-1) { 593 n = uprv_strlen((const char*)src)+1; /* copy NUL */ 594 } 595 /* copy non-null */ 596 while(*src && n>0) { 597 char ch = ebcdicFromAscii[*(src++)]; 598 if(ch == 0) { 599 ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ 600 } 601 *(dst++) = ch; 602 n--; 603 } 604 /* pad */ 605 while(n>0) { 606 *(dst++) = 0; 607 n--; 608 } 609 return orig_dst; 610 } 611 612