Home | History | Annotate | Download | only in common
      1 /*
      2  *****************************************************************************
      3  *
      4  *   Copyright (C) 1998-2007, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *****************************************************************************
      8  *
      9  *  ucnv_err.c
     10  *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
     11  *
     12  *
     13 *   Change history:
     14 *
     15 *   06/29/2000  helena      Major rewrite of the callback APIs.
     16 */
     17 
     18 #include "unicode/utypes.h"
     19 
     20 #if !UCONFIG_NO_CONVERSION
     21 
     22 #include "unicode/ucnv_err.h"
     23 #include "unicode/ucnv_cb.h"
     24 #include "ucnv_cnv.h"
     25 #include "cmemory.h"
     26 #include "unicode/ucnv.h"
     27 #include "ustrfmt.h"
     28 
     29 #define VALUE_STRING_LENGTH 32
     30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
     31 #define UNICODE_PERCENT_SIGN_CODEPOINT  0x0025
     32 #define UNICODE_U_CODEPOINT             0x0055
     33 #define UNICODE_X_CODEPOINT             0x0058
     34 #define UNICODE_RS_CODEPOINT            0x005C
     35 #define UNICODE_U_LOW_CODEPOINT         0x0075
     36 #define UNICODE_X_LOW_CODEPOINT         0x0078
     37 #define UNICODE_AMP_CODEPOINT           0x0026
     38 #define UNICODE_HASH_CODEPOINT          0x0023
     39 #define UNICODE_SEMICOLON_CODEPOINT     0x003B
     40 #define UNICODE_PLUS_CODEPOINT          0x002B
     41 #define UNICODE_LEFT_CURLY_CODEPOINT    0x007B
     42 #define UNICODE_RIGHT_CURLY_CODEPOINT   0x007D
     43 #define UNICODE_SPACE_CODEPOINT         0x0020
     44 #define UCNV_PRV_ESCAPE_ICU         0
     45 #define UCNV_PRV_ESCAPE_C           'C'
     46 #define UCNV_PRV_ESCAPE_XML_DEC     'D'
     47 #define UCNV_PRV_ESCAPE_XML_HEX     'X'
     48 #define UCNV_PRV_ESCAPE_JAVA        'J'
     49 #define UCNV_PRV_ESCAPE_UNICODE     'U'
     50 #define UCNV_PRV_ESCAPE_CSS2        'S'
     51 #define UCNV_PRV_STOP_ON_ILLEGAL    'i'
     52 
     53 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
     54 U_CAPI void    U_EXPORT2
     55 UCNV_FROM_U_CALLBACK_STOP (
     56                   const void *context,
     57                   UConverterFromUnicodeArgs *fromUArgs,
     58                   const UChar* codeUnits,
     59                   int32_t length,
     60                   UChar32 codePoint,
     61                   UConverterCallbackReason reason,
     62                   UErrorCode * err)
     63 {
     64     /* the caller must have set the error code accordingly */
     65     return;
     66 }
     67 
     68 
     69 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
     70 U_CAPI void    U_EXPORT2
     71 UCNV_TO_U_CALLBACK_STOP (
     72                    const void *context,
     73                    UConverterToUnicodeArgs *toUArgs,
     74                    const char* codePoints,
     75                    int32_t length,
     76                    UConverterCallbackReason reason,
     77                    UErrorCode * err)
     78 {
     79     /* the caller must have set the error code accordingly */
     80     return;
     81 }
     82 
     83 U_CAPI void    U_EXPORT2
     84 UCNV_FROM_U_CALLBACK_SKIP (
     85                   const void *context,
     86                   UConverterFromUnicodeArgs *fromUArgs,
     87                   const UChar* codeUnits,
     88                   int32_t length,
     89                   UChar32 codePoint,
     90                   UConverterCallbackReason reason,
     91                   UErrorCode * err)
     92 {
     93     if (reason <= UCNV_IRREGULAR)
     94     {
     95         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
     96         {
     97             *err = U_ZERO_ERROR;
     98         }
     99         /* else the caller must have set the error code accordingly. */
    100     }
    101     /* else ignore the reset, close and clone calls. */
    102 }
    103 
    104 U_CAPI void    U_EXPORT2
    105 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
    106                   const void *context,
    107                   UConverterFromUnicodeArgs *fromArgs,
    108                   const UChar* codeUnits,
    109                   int32_t length,
    110                   UChar32 codePoint,
    111                   UConverterCallbackReason reason,
    112                   UErrorCode * err)
    113 {
    114     if (reason <= UCNV_IRREGULAR)
    115     {
    116         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
    117         {
    118             *err = U_ZERO_ERROR;
    119             ucnv_cbFromUWriteSub(fromArgs, 0, err);
    120         }
    121         /* else the caller must have set the error code accordingly. */
    122     }
    123     /* else ignore the reset, close and clone calls. */
    124 }
    125 
    126 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
    127  *uses a clean copy (resetted) of the converter, to convert that unicode
    128  *escape sequence to the target codepage (if conversion failure happens then
    129  *we revert to substituting with subchar)
    130  */
    131 U_CAPI void    U_EXPORT2
    132 UCNV_FROM_U_CALLBACK_ESCAPE (
    133                          const void *context,
    134                          UConverterFromUnicodeArgs *fromArgs,
    135                          const UChar *codeUnits,
    136                          int32_t length,
    137                          UChar32 codePoint,
    138                          UConverterCallbackReason reason,
    139                          UErrorCode * err)
    140 {
    141 
    142   UChar valueString[VALUE_STRING_LENGTH];
    143   int32_t valueStringLength = 0;
    144   int32_t i = 0;
    145 
    146   const UChar *myValueSource = NULL;
    147   UErrorCode err2 = U_ZERO_ERROR;
    148   UConverterFromUCallback original = NULL;
    149   const void *originalContext;
    150 
    151   UConverterFromUCallback ignoredCallback = NULL;
    152   const void *ignoredContext;
    153 
    154   if (reason > UCNV_IRREGULAR)
    155   {
    156       return;
    157   }
    158 
    159   ucnv_setFromUCallBack (fromArgs->converter,
    160                      (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
    161                      NULL,
    162                      &original,
    163                      &originalContext,
    164                      &err2);
    165 
    166   if (U_FAILURE (err2))
    167   {
    168     *err = err2;
    169     return;
    170   }
    171   if(context==NULL)
    172   {
    173       while (i < length)
    174       {
    175         valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
    176         valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
    177         valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
    178       }
    179   }
    180   else
    181   {
    182       switch(*((char*)context))
    183       {
    184       case UCNV_PRV_ESCAPE_JAVA:
    185           while (i < length)
    186           {
    187               valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
    188               valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
    189               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
    190           }
    191           break;
    192 
    193       case UCNV_PRV_ESCAPE_C:
    194           valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
    195 
    196           if(length==2){
    197               valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
    198               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
    199 
    200           }
    201           else{
    202               valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
    203               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
    204           }
    205           break;
    206 
    207       case UCNV_PRV_ESCAPE_XML_DEC:
    208 
    209           valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
    210           valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
    211           if(length==2){
    212               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
    213           }
    214           else{
    215               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
    216           }
    217           valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
    218           break;
    219 
    220       case UCNV_PRV_ESCAPE_XML_HEX:
    221 
    222           valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
    223           valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
    224           valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
    225           if(length==2){
    226               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
    227           }
    228           else{
    229               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
    230           }
    231           valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
    232           break;
    233 
    234       case UCNV_PRV_ESCAPE_UNICODE:
    235           valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
    236           valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;    /* adding U */
    237           valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
    238           if (length == 2) {
    239               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
    240           } else {
    241               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
    242           }
    243           valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
    244           break;
    245 
    246       case UCNV_PRV_ESCAPE_CSS2:
    247           valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
    248           valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
    249           /* Always add space character, becase the next character might be whitespace,
    250              which would erroneously be considered the termination of the escape sequence. */
    251           valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
    252           break;
    253 
    254       default:
    255           while (i < length)
    256           {
    257               valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
    258               valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;             /* adding U */
    259               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
    260           }
    261       }
    262   }
    263   myValueSource = valueString;
    264 
    265   /* reset the error */
    266   *err = U_ZERO_ERROR;
    267 
    268   ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
    269 
    270   ucnv_setFromUCallBack (fromArgs->converter,
    271                          original,
    272                          originalContext,
    273                          &ignoredCallback,
    274                          &ignoredContext,
    275                          &err2);
    276   if (U_FAILURE (err2))
    277   {
    278       *err = err2;
    279       return;
    280   }
    281 
    282   return;
    283 }
    284 
    285 
    286 
    287 U_CAPI void  U_EXPORT2
    288 UCNV_TO_U_CALLBACK_SKIP (
    289                  const void *context,
    290                  UConverterToUnicodeArgs *toArgs,
    291                  const char* codeUnits,
    292                  int32_t length,
    293                  UConverterCallbackReason reason,
    294                  UErrorCode * err)
    295 {
    296     if (reason <= UCNV_IRREGULAR)
    297     {
    298         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
    299         {
    300             *err = U_ZERO_ERROR;
    301         }
    302         /* else the caller must have set the error code accordingly. */
    303     }
    304     /* else ignore the reset, close and clone calls. */
    305 }
    306 
    307 U_CAPI void    U_EXPORT2
    308 UCNV_TO_U_CALLBACK_SUBSTITUTE (
    309                  const void *context,
    310                  UConverterToUnicodeArgs *toArgs,
    311                  const char* codeUnits,
    312                  int32_t length,
    313                  UConverterCallbackReason reason,
    314                  UErrorCode * err)
    315 {
    316     if (reason <= UCNV_IRREGULAR)
    317     {
    318         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
    319         {
    320             *err = U_ZERO_ERROR;
    321             ucnv_cbToUWriteSub(toArgs,0,err);
    322         }
    323         /* else the caller must have set the error code accordingly. */
    324     }
    325     /* else ignore the reset, close and clone calls. */
    326 }
    327 
    328 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
    329  *and uses that as the substitution sequence
    330  */
    331 U_CAPI void   U_EXPORT2
    332 UCNV_TO_U_CALLBACK_ESCAPE (
    333                  const void *context,
    334                  UConverterToUnicodeArgs *toArgs,
    335                  const char* codeUnits,
    336                  int32_t length,
    337                  UConverterCallbackReason reason,
    338                  UErrorCode * err)
    339 {
    340     UChar uniValueString[VALUE_STRING_LENGTH];
    341     int32_t valueStringLength = 0;
    342     int32_t i = 0;
    343 
    344     if (reason > UCNV_IRREGULAR)
    345     {
    346         return;
    347     }
    348 
    349     if(context==NULL)
    350     {
    351         while (i < length)
    352         {
    353             uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
    354             uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
    355             valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
    356         }
    357     }
    358     else
    359     {
    360         switch(*((char*)context))
    361         {
    362         case UCNV_PRV_ESCAPE_XML_DEC:
    363             while (i < length)
    364             {
    365                 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
    366                 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
    367                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
    368                 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
    369             }
    370             break;
    371 
    372         case UCNV_PRV_ESCAPE_XML_HEX:
    373             while (i < length)
    374             {
    375                 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
    376                 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
    377                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
    378                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
    379                 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
    380             }
    381             break;
    382         case UCNV_PRV_ESCAPE_C:
    383             while (i < length)
    384             {
    385                 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
    386                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
    387                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
    388             }
    389             break;
    390         default:
    391             while (i < length)
    392             {
    393                 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
    394                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
    395                 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
    396                 valueStringLength += 2;
    397             }
    398         }
    399     }
    400     /* reset the error */
    401     *err = U_ZERO_ERROR;
    402 
    403     ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
    404 }
    405 
    406 #endif
    407