1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2010-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * file name: ucnv_ct.c 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010Dec09 14 * created by: Michael Ow 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 20 21 #include "unicode/ucnv.h" 22 #include "unicode/uset.h" 23 #include "unicode/ucnv_err.h" 24 #include "unicode/ucnv_cb.h" 25 #include "unicode/utf16.h" 26 #include "ucnv_imp.h" 27 #include "ucnv_bld.h" 28 #include "ucnv_cnv.h" 29 #include "ucnvmbcs.h" 30 #include "cstring.h" 31 #include "cmemory.h" 32 33 typedef enum { 34 INVALID = -2, 35 DO_SEARCH = -1, 36 37 COMPOUND_TEXT_SINGLE_0 = 0, 38 COMPOUND_TEXT_SINGLE_1 = 1, 39 COMPOUND_TEXT_SINGLE_2 = 2, 40 COMPOUND_TEXT_SINGLE_3 = 3, 41 42 COMPOUND_TEXT_DOUBLE_1 = 4, 43 COMPOUND_TEXT_DOUBLE_2 = 5, 44 COMPOUND_TEXT_DOUBLE_3 = 6, 45 COMPOUND_TEXT_DOUBLE_4 = 7, 46 COMPOUND_TEXT_DOUBLE_5 = 8, 47 COMPOUND_TEXT_DOUBLE_6 = 9, 48 COMPOUND_TEXT_DOUBLE_7 = 10, 49 50 COMPOUND_TEXT_TRIPLE_DOUBLE = 11, 51 52 IBM_915 = 12, 53 IBM_916 = 13, 54 IBM_914 = 14, 55 IBM_874 = 15, 56 IBM_912 = 16, 57 IBM_913 = 17, 58 ISO_8859_14 = 18, 59 IBM_923 = 19, 60 NUM_OF_CONVERTERS = 20 61 } COMPOUND_TEXT_CONVERTERS; 62 63 #define SEARCH_LENGTH 12 64 65 static const uint8_t escSeqCompoundText[NUM_OF_CONVERTERS][5] = { 66 /* Single */ 67 { 0x1B, 0x2D, 0x41, 0, 0 }, 68 { 0x1B, 0x2D, 0x4D, 0, 0 }, 69 { 0x1B, 0x2D, 0x46, 0, 0 }, 70 { 0x1B, 0x2D, 0x47, 0, 0 }, 71 72 /* Double */ 73 { 0x1B, 0x24, 0x29, 0x41, 0 }, 74 { 0x1B, 0x24, 0x29, 0x42, 0 }, 75 { 0x1B, 0x24, 0x29, 0x43, 0 }, 76 { 0x1B, 0x24, 0x29, 0x44, 0 }, 77 { 0x1B, 0x24, 0x29, 0x47, 0 }, 78 { 0x1B, 0x24, 0x29, 0x48, 0 }, 79 { 0x1B, 0x24, 0x29, 0x49, 0 }, 80 81 /* Triple/Double */ 82 { 0x1B, 0x25, 0x47, 0, 0 }, 83 84 /*IBM-915*/ 85 { 0x1B, 0x2D, 0x4C, 0, 0 }, 86 /*IBM-916*/ 87 { 0x1B, 0x2D, 0x48, 0, 0 }, 88 /*IBM-914*/ 89 { 0x1B, 0x2D, 0x44, 0, 0 }, 90 /*IBM-874*/ 91 { 0x1B, 0x2D, 0x54, 0, 0 }, 92 /*IBM-912*/ 93 { 0x1B, 0x2D, 0x42, 0, 0 }, 94 /* IBM-913 */ 95 { 0x1B, 0x2D, 0x43, 0, 0 }, 96 /* ISO-8859_14 */ 97 { 0x1B, 0x2D, 0x5F, 0, 0 }, 98 /* IBM-923 */ 99 { 0x1B, 0x2D, 0x62, 0, 0 }, 100 }; 101 102 #define ESC_START 0x1B 103 104 #define isASCIIRange(codepoint) \ 105 ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \ 106 (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) 107 108 #define isIBM915(codepoint) \ 109 ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) 110 111 #define isIBM916(codepoint) \ 112 ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) 113 114 #define isCompoundS3(codepoint) \ 115 ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \ 116 (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \ 117 (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE)) 118 119 #define isCompoundS2(codepoint) \ 120 ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) 121 122 #define isIBM914(codepoint) \ 123 ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \ 124 (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \ 125 (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \ 126 (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \ 127 (codepoint == 0x0172) || (codepoint == 0x0173)) 128 129 #define isIBM874(codepoint) \ 130 ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) 131 132 #define isIBM912(codepoint) \ 133 ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \ 134 (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \ 135 (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \ 136 (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \ 137 (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \ 138 (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)) 139 140 #define isIBM913(codepoint) \ 141 ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \ 142 (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \ 143 (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \ 144 (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D)) 145 146 #define isCompoundS1(codepoint) \ 147 ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \ 148 (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) 149 150 #define isISO8859_14(codepoint) \ 151 ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \ 152 (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \ 153 (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \ 154 (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \ 155 (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \ 156 (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85)) 157 158 #define isIBM923(codepoint) \ 159 ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) 160 161 162 typedef struct{ 163 UConverterSharedData *myConverterArray[NUM_OF_CONVERTERS]; 164 COMPOUND_TEXT_CONVERTERS state; 165 } UConverterDataCompoundText; 166 167 /*********** Compound Text Converter Protos ***********/ 168 U_CDECL_BEGIN 169 static void U_CALLCONV 170 _CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 171 172 static void U_CALLCONV 173 _CompoundTextClose(UConverter *converter); 174 175 static void U_CALLCONV 176 _CompoundTextReset(UConverter *converter, UConverterResetChoice choice); 177 178 static const char* U_CALLCONV 179 _CompoundTextgetName(const UConverter* cnv); 180 181 182 static int32_t findNextEsc(const char *source, const char *sourceLimit) { 183 int32_t length = sourceLimit - source; 184 int32_t i; 185 for (i = 1; i < length; i++) { 186 if (*(source + i) == 0x1B) { 187 return i; 188 } 189 } 190 191 return length; 192 } 193 194 static COMPOUND_TEXT_CONVERTERS getState(int codepoint) { 195 COMPOUND_TEXT_CONVERTERS state = DO_SEARCH; 196 197 if (isASCIIRange(codepoint)) { 198 state = COMPOUND_TEXT_SINGLE_0; 199 } else if (isIBM912(codepoint)) { 200 state = IBM_912; 201 }else if (isIBM913(codepoint)) { 202 state = IBM_913; 203 } else if (isISO8859_14(codepoint)) { 204 state = ISO_8859_14; 205 } else if (isIBM923(codepoint)) { 206 state = IBM_923; 207 } else if (isIBM874(codepoint)) { 208 state = IBM_874; 209 } else if (isIBM914(codepoint)) { 210 state = IBM_914; 211 } else if (isCompoundS2(codepoint)) { 212 state = COMPOUND_TEXT_SINGLE_2; 213 } else if (isCompoundS3(codepoint)) { 214 state = COMPOUND_TEXT_SINGLE_3; 215 } else if (isIBM916(codepoint)) { 216 state = IBM_916; 217 } else if (isIBM915(codepoint)) { 218 state = IBM_915; 219 } else if (isCompoundS1(codepoint)) { 220 state = COMPOUND_TEXT_SINGLE_1; 221 } 222 223 return state; 224 } 225 226 static COMPOUND_TEXT_CONVERTERS findStateFromEscSeq(const char* source, const char* sourceLimit, const uint8_t* toUBytesBuffer, int32_t toUBytesBufferLength, UErrorCode *err) { 227 COMPOUND_TEXT_CONVERTERS state = INVALID; 228 UBool matchFound = FALSE; 229 int32_t i, n, offset = toUBytesBufferLength; 230 231 for (i = 0; i < NUM_OF_CONVERTERS; i++) { 232 matchFound = TRUE; 233 for (n = 0; escSeqCompoundText[i][n] != 0; n++) { 234 if (n < toUBytesBufferLength) { 235 if (toUBytesBuffer[n] != escSeqCompoundText[i][n]) { 236 matchFound = FALSE; 237 break; 238 } 239 } else if ((source + (n - offset)) >= sourceLimit) { 240 *err = U_TRUNCATED_CHAR_FOUND; 241 matchFound = FALSE; 242 break; 243 } else if (*(source + (n - offset)) != escSeqCompoundText[i][n]) { 244 matchFound = FALSE; 245 break; 246 } 247 } 248 249 if (matchFound) { 250 break; 251 } 252 } 253 254 if (matchFound) { 255 state = (COMPOUND_TEXT_CONVERTERS)i; 256 } 257 258 return state; 259 } 260 261 static void U_CALLCONV 262 _CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ 263 cnv->extraInfo = uprv_malloc (sizeof (UConverterDataCompoundText)); 264 if (cnv->extraInfo != NULL) { 265 UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; 266 267 UConverterNamePieces stackPieces; 268 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 269 270 myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_0] = NULL; 271 myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_1] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces, &stackArgs, errorCode); 272 myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_2] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces, &stackArgs, errorCode); 273 myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_3] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces, &stackArgs, errorCode); 274 myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_1] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces, &stackArgs, errorCode); 275 myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_2] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces, &stackArgs, errorCode); 276 myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_3] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces, &stackArgs, errorCode); 277 myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_4] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces, &stackArgs, errorCode); 278 myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_5] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces, &stackArgs, errorCode); 279 myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_6] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces, &stackArgs, errorCode); 280 myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_7] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces, &stackArgs, errorCode); 281 myConverterData->myConverterArray[COMPOUND_TEXT_TRIPLE_DOUBLE] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces, &stackArgs, errorCode); 282 283 myConverterData->myConverterArray[IBM_915] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces, &stackArgs, errorCode); 284 myConverterData->myConverterArray[IBM_916] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces, &stackArgs, errorCode); 285 myConverterData->myConverterArray[IBM_914] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces, &stackArgs, errorCode); 286 myConverterData->myConverterArray[IBM_874] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces, &stackArgs, errorCode); 287 myConverterData->myConverterArray[IBM_912] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces, &stackArgs, errorCode); 288 myConverterData->myConverterArray[IBM_913] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces, &stackArgs, errorCode); 289 myConverterData->myConverterArray[ISO_8859_14] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces, &stackArgs, errorCode); 290 myConverterData->myConverterArray[IBM_923] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces, &stackArgs, errorCode); 291 292 if (U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { 293 _CompoundTextClose(cnv); 294 return; 295 } 296 297 myConverterData->state = (COMPOUND_TEXT_CONVERTERS)0; 298 } else { 299 *errorCode = U_MEMORY_ALLOCATION_ERROR; 300 } 301 } 302 303 304 static void U_CALLCONV 305 _CompoundTextClose(UConverter *converter) { 306 UConverterDataCompoundText* myConverterData = (UConverterDataCompoundText*)(converter->extraInfo); 307 int32_t i; 308 309 if (converter->extraInfo != NULL) { 310 /*close the array of converter pointers and free the memory*/ 311 for (i = 0; i < NUM_OF_CONVERTERS; i++) { 312 if (myConverterData->myConverterArray[i] != NULL) { 313 ucnv_unloadSharedDataIfReady(myConverterData->myConverterArray[i]); 314 } 315 } 316 317 uprv_free(converter->extraInfo); 318 converter->extraInfo = NULL; 319 } 320 } 321 322 static void U_CALLCONV 323 _CompoundTextReset(UConverter *converter, UConverterResetChoice choice) { 324 (void)converter; 325 (void)choice; 326 } 327 328 static const char* U_CALLCONV 329 _CompoundTextgetName(const UConverter* cnv){ 330 (void)cnv; 331 return "x11-compound-text"; 332 } 333 334 static void U_CALLCONV 335 UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UErrorCode* err){ 336 UConverter *cnv = args->converter; 337 uint8_t *target = (uint8_t *) args->target; 338 const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; 339 const UChar* source = args->source; 340 const UChar* sourceLimit = args->sourceLimit; 341 /* int32_t* offsets = args->offsets; */ 342 UChar32 sourceChar; 343 UBool useFallback = cnv->useFallback; 344 uint8_t tmpTargetBuffer[7]; 345 int32_t tmpTargetBufferLength = 0; 346 COMPOUND_TEXT_CONVERTERS currentState, tmpState; 347 uint32_t pValue; 348 int32_t pValueLength = 0; 349 int32_t i, n, j; 350 351 UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; 352 353 currentState = myConverterData->state; 354 355 /* check if the last codepoint of previous buffer was a lead surrogate*/ 356 if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { 357 goto getTrail; 358 } 359 360 while( source < sourceLimit){ 361 if(target < targetLimit){ 362 363 sourceChar = *(source++); 364 /*check if the char is a First surrogate*/ 365 if(U16_IS_SURROGATE(sourceChar)) { 366 if(U16_IS_SURROGATE_LEAD(sourceChar)) { 367 getTrail: 368 /*look ahead to find the trail surrogate*/ 369 if(source < sourceLimit) { 370 /* test the following code unit */ 371 UChar trail=(UChar) *source; 372 if(U16_IS_TRAIL(trail)) { 373 source++; 374 sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); 375 cnv->fromUChar32=0x00; 376 /* convert this supplementary code point */ 377 /* exit this condition tree */ 378 } else { 379 /* this is an unmatched lead code unit (1st surrogate) */ 380 /* callback(illegal) */ 381 *err=U_ILLEGAL_CHAR_FOUND; 382 cnv->fromUChar32=sourceChar; 383 break; 384 } 385 } else { 386 /* no more input */ 387 cnv->fromUChar32=sourceChar; 388 break; 389 } 390 } else { 391 /* this is an unmatched trail code unit (2nd surrogate) */ 392 /* callback(illegal) */ 393 *err=U_ILLEGAL_CHAR_FOUND; 394 cnv->fromUChar32=sourceChar; 395 break; 396 } 397 } 398 399 tmpTargetBufferLength = 0; 400 tmpState = getState(sourceChar); 401 402 if (tmpState != DO_SEARCH && currentState != tmpState) { 403 /* Get escape sequence if necessary */ 404 currentState = tmpState; 405 for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) { 406 tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i]; 407 } 408 } 409 410 if (tmpState == DO_SEARCH) { 411 /* Test all available converters */ 412 for (i = 1; i < SEARCH_LENGTH; i++) { 413 pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[i], sourceChar, &pValue, useFallback); 414 if (pValueLength > 0) { 415 tmpState = (COMPOUND_TEXT_CONVERTERS)i; 416 if (currentState != tmpState) { 417 currentState = tmpState; 418 for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) { 419 tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j]; 420 } 421 } 422 for (n = (pValueLength - 1); n >= 0; n--) { 423 tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); 424 } 425 break; 426 } 427 } 428 } else if (tmpState == COMPOUND_TEXT_SINGLE_0) { 429 tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)sourceChar; 430 } else { 431 pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[currentState], sourceChar, &pValue, useFallback); 432 if (pValueLength > 0) { 433 for (n = (pValueLength - 1); n >= 0; n--) { 434 tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); 435 } 436 } 437 } 438 439 for (i = 0; i < tmpTargetBufferLength; i++) { 440 if (target < targetLimit) { 441 *target++ = tmpTargetBuffer[i]; 442 } else { 443 *err = U_BUFFER_OVERFLOW_ERROR; 444 break; 445 } 446 } 447 448 if (*err == U_BUFFER_OVERFLOW_ERROR) { 449 for (; i < tmpTargetBufferLength; i++) { 450 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = tmpTargetBuffer[i]; 451 } 452 } 453 } else { 454 *err = U_BUFFER_OVERFLOW_ERROR; 455 break; 456 } 457 } 458 459 /*save the state and return */ 460 myConverterData->state = currentState; 461 args->source = source; 462 args->target = (char*)target; 463 } 464 465 466 static void U_CALLCONV 467 UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args, 468 UErrorCode* err){ 469 const char *mySource = (char *) args->source; 470 UChar *myTarget = args->target; 471 const char *mySourceLimit = args->sourceLimit; 472 const char *tmpSourceLimit = mySourceLimit; 473 uint32_t mySourceChar = 0x0000; 474 COMPOUND_TEXT_CONVERTERS currentState, tmpState; 475 int32_t sourceOffset = 0; 476 UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) args->converter->extraInfo; 477 UConverterSharedData* savedSharedData = NULL; 478 479 UConverterToUnicodeArgs subArgs; 480 int32_t minArgsSize; 481 482 /* set up the subconverter arguments */ 483 if(args->size<sizeof(UConverterToUnicodeArgs)) { 484 minArgsSize = args->size; 485 } else { 486 minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); 487 } 488 489 uprv_memcpy(&subArgs, args, minArgsSize); 490 subArgs.size = (uint16_t)minArgsSize; 491 492 currentState = tmpState = myConverterData->state; 493 494 while(mySource < mySourceLimit){ 495 if(myTarget < args->targetLimit){ 496 if (args->converter->toULength > 0) { 497 mySourceChar = args->converter->toUBytes[0]; 498 } else { 499 mySourceChar = (uint8_t)*mySource; 500 } 501 502 if (mySourceChar == ESC_START) { 503 tmpState = findStateFromEscSeq(mySource, mySourceLimit, args->converter->toUBytes, args->converter->toULength, err); 504 505 if (*err == U_TRUNCATED_CHAR_FOUND) { 506 for (; mySource < mySourceLimit;) { 507 args->converter->toUBytes[args->converter->toULength++] = *mySource++; 508 } 509 *err = U_ZERO_ERROR; 510 break; 511 } else if (tmpState == INVALID) { 512 if (args->converter->toULength == 0) { 513 mySource++; /* skip over the 0x1b byte */ 514 } 515 *err = U_ILLEGAL_CHAR_FOUND; 516 break; 517 } 518 519 if (tmpState != currentState) { 520 currentState = tmpState; 521 } 522 523 sourceOffset = static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength); 524 525 mySource += sourceOffset; 526 527 args->converter->toULength = 0; 528 } 529 530 if (currentState == COMPOUND_TEXT_SINGLE_0) { 531 while (mySource < mySourceLimit) { 532 if (*mySource == ESC_START) { 533 break; 534 } 535 if (myTarget < args->targetLimit) { 536 *myTarget++ = 0x00ff&(*mySource++); 537 } else { 538 *err = U_BUFFER_OVERFLOW_ERROR; 539 break; 540 } 541 } 542 } else if (mySource < mySourceLimit){ 543 sourceOffset = findNextEsc(mySource, mySourceLimit); 544 545 tmpSourceLimit = mySource + sourceOffset; 546 547 subArgs.source = mySource; 548 subArgs.sourceLimit = tmpSourceLimit; 549 subArgs.target = myTarget; 550 savedSharedData = subArgs.converter->sharedData; 551 subArgs.converter->sharedData = myConverterData->myConverterArray[currentState]; 552 553 ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); 554 555 subArgs.converter->sharedData = savedSharedData; 556 557 mySource = subArgs.source; 558 myTarget = subArgs.target; 559 560 if (U_FAILURE(*err)) { 561 if(*err == U_BUFFER_OVERFLOW_ERROR) { 562 if(subArgs.converter->UCharErrorBufferLength > 0) { 563 uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, 564 subArgs.converter->UCharErrorBufferLength); 565 } 566 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; 567 subArgs.converter->UCharErrorBufferLength = 0; 568 } 569 break; 570 } 571 } 572 } else { 573 *err = U_BUFFER_OVERFLOW_ERROR; 574 break; 575 } 576 } 577 myConverterData->state = currentState; 578 args->target = myTarget; 579 args->source = mySource; 580 } 581 582 static void U_CALLCONV 583 _CompoundText_GetUnicodeSet(const UConverter *cnv, 584 const USetAdder *sa, 585 UConverterUnicodeSet which, 586 UErrorCode *pErrorCode) { 587 UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *)cnv->extraInfo; 588 int32_t i; 589 590 for (i = 1; i < NUM_OF_CONVERTERS; i++) { 591 ucnv_MBCSGetUnicodeSetForUnicode(myConverterData->myConverterArray[i], sa, which, pErrorCode); 592 } 593 sa->add(sa->set, 0x0000); 594 sa->add(sa->set, 0x0009); 595 sa->add(sa->set, 0x000A); 596 sa->addRange(sa->set, 0x0020, 0x007F); 597 sa->addRange(sa->set, 0x00A0, 0x00FF); 598 } 599 U_CDECL_END 600 601 static const UConverterImpl _CompoundTextImpl = { 602 603 UCNV_COMPOUND_TEXT, 604 605 NULL, 606 NULL, 607 608 _CompoundTextOpen, 609 _CompoundTextClose, 610 _CompoundTextReset, 611 612 UConverter_toUnicode_CompoundText_OFFSETS, 613 UConverter_toUnicode_CompoundText_OFFSETS, 614 UConverter_fromUnicode_CompoundText_OFFSETS, 615 UConverter_fromUnicode_CompoundText_OFFSETS, 616 NULL, 617 618 NULL, 619 _CompoundTextgetName, 620 NULL, 621 NULL, 622 _CompoundText_GetUnicodeSet, 623 NULL, 624 NULL 625 }; 626 627 static const UConverterStaticData _CompoundTextStaticData = { 628 sizeof(UConverterStaticData), 629 "COMPOUND_TEXT", 630 0, 631 UCNV_IBM, 632 UCNV_COMPOUND_TEXT, 633 1, 634 6, 635 { 0xef, 0, 0, 0 }, 636 1, 637 FALSE, 638 FALSE, 639 0, 640 0, 641 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 642 }; 643 const UConverterSharedData _CompoundTextData = 644 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData, &_CompoundTextImpl); 645 646 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 647