Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2006, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6  *  ucnv_cb.c:
      7  *  External APIs for the ICU's codeset conversion library
      8  *  Helena Shih
      9  *
     10  * Modification History:
     11  *
     12  *   Date        Name        Description
     13  *   7/28/2000   srl         Implementation
     14  */
     15 
     16 /**
     17  * @name Character Conversion C API
     18  *
     19  */
     20 
     21 #include "unicode/utypes.h"
     22 
     23 #if !UCONFIG_NO_CONVERSION
     24 
     25 #include "unicode/ucnv_cb.h"
     26 #include "ucnv_bld.h"
     27 #include "ucnv_cnv.h"
     28 #include "cmemory.h"
     29 
     30 /* need to update the offsets when the target moves. */
     31 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
     32 if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
     33 the same call stack if the complexity arises. */
     34 U_CAPI void  U_EXPORT2
     35 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
     36                        const char* source,
     37                        int32_t length,
     38                        int32_t offsetIndex,
     39                        UErrorCode * err)
     40 {
     41     if(U_FAILURE(*err)) {
     42         return;
     43     }
     44 
     45     ucnv_fromUWriteBytes(
     46         args->converter,
     47         source, length,
     48         &args->target, args->targetLimit,
     49         &args->offsets, offsetIndex,
     50         err);
     51 }
     52 
     53 U_CAPI void  U_EXPORT2
     54 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
     55                              const UChar** source,
     56                              const UChar*  sourceLimit,
     57                              int32_t offsetIndex,
     58                              UErrorCode * err)
     59 {
     60     /*
     61     This is a fun one.  Recursion can occur - we're basically going to
     62     just retry shoving data through the same converter. Note, if you got
     63     here through some kind of invalid sequence, you maybe should emit a
     64     reset sequence of some kind and/or call ucnv_reset().  Since this
     65     IS an actual conversion, take care that you've changed the callback
     66     or the data, or you'll get an infinite loop.
     67 
     68     Please set the err value to something reasonable before calling
     69     into this.
     70     */
     71 
     72     char *oldTarget;
     73 
     74     if(U_FAILURE(*err))
     75     {
     76         return;
     77     }
     78 
     79     oldTarget = args->target;
     80 
     81     ucnv_fromUnicode(args->converter,
     82         &args->target,
     83         args->targetLimit,
     84         source,
     85         sourceLimit,
     86         NULL, /* no offsets */
     87         FALSE, /* no flush */
     88         err);
     89 
     90     if(args->offsets)
     91     {
     92         while (args->target != oldTarget)  /* if it moved at all.. */
     93         {
     94             *(args->offsets)++ = offsetIndex;
     95             oldTarget++;
     96         }
     97     }
     98 
     99     /*
    100     Note, if you did something like used a Stop subcallback, things would get interesting.
    101     In fact, here's where we want to return the partially consumed in-source!
    102     */
    103     if(*err == U_BUFFER_OVERFLOW_ERROR)
    104     /* && (*source < sourceLimit && args->target >= args->targetLimit)
    105     -- S. Hrcek */
    106     {
    107         /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
    108         It's a fixed size. If we overflow it... Hmm */
    109         char *newTarget;
    110         const char *newTargetLimit;
    111         UErrorCode err2 = U_ZERO_ERROR;
    112 
    113         int8_t errBuffLen;
    114 
    115         errBuffLen  = args->converter->charErrorBufferLength;
    116 
    117         /* start the new target at the first free slot in the errbuff.. */
    118         newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
    119 
    120         newTargetLimit = (char *)(args->converter->charErrorBuffer +
    121             sizeof(args->converter->charErrorBuffer));
    122 
    123         if(newTarget >= newTargetLimit)
    124         {
    125             *err = U_INTERNAL_PROGRAM_ERROR;
    126             return;
    127         }
    128 
    129         /* We're going to tell the converter that the errbuff len is empty.
    130         This prevents the existing errbuff from being 'flushed' out onto
    131         itself.  If the errbuff is needed by the converter this time,
    132         we're hosed - we're out of space! */
    133 
    134         args->converter->charErrorBufferLength = 0;
    135 
    136         ucnv_fromUnicode(args->converter,
    137                          &newTarget,
    138                          newTargetLimit,
    139                          source,
    140                          sourceLimit,
    141                          NULL,
    142                          FALSE,
    143                          &err2);
    144 
    145         /* We can go ahead and overwrite the  length here. We know just how
    146         to recalculate it. */
    147 
    148         args->converter->charErrorBufferLength = (int8_t)(
    149             newTarget - (char*)args->converter->charErrorBuffer);
    150 
    151         if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
    152         {
    153             /* now we're REALLY in trouble.
    154             Internal program error - callback shouldn't have written this much
    155             data!
    156             */
    157             *err = U_INTERNAL_PROGRAM_ERROR;
    158             return;
    159         }
    160         /*else {*/
    161             /* sub errs could be invalid/truncated/illegal chars or w/e.
    162             These might want to be passed on up.. But the problem is, we already
    163             need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
    164             other errs.. */
    165 
    166             /*
    167             if(U_FAILURE(err2))
    168             ??
    169             */
    170         /*}*/
    171     }
    172 }
    173 
    174 U_CAPI void  U_EXPORT2
    175 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
    176                            int32_t offsetIndex,
    177                            UErrorCode * err)
    178 {
    179     UConverter *converter;
    180     int32_t length;
    181 
    182     if(U_FAILURE(*err)) {
    183         return;
    184     }
    185     converter = args->converter;
    186     length = converter->subCharLen;
    187 
    188     if(length == 0) {
    189         return;
    190     }
    191 
    192     if(length < 0) {
    193         /*
    194          * Write/convert the substitution string. Its real length is -length.
    195          * Unlike the escape callback, we need not change the converter's
    196          * callback function because ucnv_setSubstString() verified that
    197          * the string can be converted, so we will not get a conversion error
    198          * and will not recurse.
    199          * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
    200          */
    201         const UChar *source = (const UChar *)converter->subChars;
    202         ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
    203         return;
    204     }
    205 
    206     if(converter->sharedData->impl->writeSub!=NULL) {
    207         converter->sharedData->impl->writeSub(args, offsetIndex, err);
    208     }
    209     else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
    210         /*
    211         TODO: Is this untestable because the MBCS converter has a writeSub function to call
    212         and the other converters don't use subChar1?
    213         */
    214         ucnv_cbFromUWriteBytes(args,
    215                                (const char *)&converter->subChar1, 1,
    216                                offsetIndex, err);
    217     }
    218     else {
    219         ucnv_cbFromUWriteBytes(args,
    220                                (const char *)converter->subChars, length,
    221                                offsetIndex, err);
    222     }
    223 }
    224 
    225 U_CAPI void  U_EXPORT2
    226 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
    227                             const UChar* source,
    228                             int32_t length,
    229                             int32_t offsetIndex,
    230                             UErrorCode * err)
    231 {
    232     if(U_FAILURE(*err)) {
    233         return;
    234     }
    235 
    236     ucnv_toUWriteUChars(
    237         args->converter,
    238         source, length,
    239         &args->target, args->targetLimit,
    240         &args->offsets, offsetIndex,
    241         err);
    242 }
    243 
    244 U_CAPI void  U_EXPORT2
    245 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
    246                          int32_t offsetIndex,
    247                        UErrorCode * err)
    248 {
    249     static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
    250 
    251     /* could optimize this case, just one uchar */
    252     if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
    253         ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
    254     } else {
    255         ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
    256     }
    257 }
    258 
    259 #endif
    260