Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2000-2006, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8  *  ucnv_cb.c:
      9  *  External APIs for the ICU's codeset conversion library
     10  *  Helena Shih
     11  *
     12  * Modification History:
     13  *
     14  *   Date        Name        Description
     15  *   7/28/2000   srl         Implementation
     16  */
     17 
     18 /**
     19  * @name Character Conversion C API
     20  *
     21  */
     22 
     23 #include "unicode/utypes.h"
     24 
     25 #if !UCONFIG_NO_CONVERSION
     26 
     27 #include "unicode/ucnv_cb.h"
     28 #include "ucnv_bld.h"
     29 #include "ucnv_cnv.h"
     30 #include "cmemory.h"
     31 
     32 /* need to update the offsets when the target moves. */
     33 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
     34 if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
     35 the same call stack if the complexity arises. */
     36 U_CAPI void  U_EXPORT2
     37 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
     38                        const char* source,
     39                        int32_t length,
     40                        int32_t offsetIndex,
     41                        UErrorCode * err)
     42 {
     43     if(U_FAILURE(*err)) {
     44         return;
     45     }
     46 
     47     ucnv_fromUWriteBytes(
     48         args->converter,
     49         source, length,
     50         &args->target, args->targetLimit,
     51         &args->offsets, offsetIndex,
     52         err);
     53 }
     54 
     55 U_CAPI void  U_EXPORT2
     56 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
     57                              const UChar** source,
     58                              const UChar*  sourceLimit,
     59                              int32_t offsetIndex,
     60                              UErrorCode * err)
     61 {
     62     /*
     63     This is a fun one.  Recursion can occur - we're basically going to
     64     just retry shoving data through the same converter. Note, if you got
     65     here through some kind of invalid sequence, you maybe should emit a
     66     reset sequence of some kind and/or call ucnv_reset().  Since this
     67     IS an actual conversion, take care that you've changed the callback
     68     or the data, or you'll get an infinite loop.
     69 
     70     Please set the err value to something reasonable before calling
     71     into this.
     72     */
     73 
     74     char *oldTarget;
     75 
     76     if(U_FAILURE(*err))
     77     {
     78         return;
     79     }
     80 
     81     oldTarget = args->target;
     82 
     83     ucnv_fromUnicode(args->converter,
     84         &args->target,
     85         args->targetLimit,
     86         source,
     87         sourceLimit,
     88         NULL, /* no offsets */
     89         FALSE, /* no flush */
     90         err);
     91 
     92     if(args->offsets)
     93     {
     94         while (args->target != oldTarget)  /* if it moved at all.. */
     95         {
     96             *(args->offsets)++ = offsetIndex;
     97             oldTarget++;
     98         }
     99     }
    100 
    101     /*
    102     Note, if you did something like used a Stop subcallback, things would get interesting.
    103     In fact, here's where we want to return the partially consumed in-source!
    104     */
    105     if(*err == U_BUFFER_OVERFLOW_ERROR)
    106     /* && (*source < sourceLimit && args->target >= args->targetLimit)
    107     -- S. Hrcek */
    108     {
    109         /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
    110         It's a fixed size. If we overflow it... Hmm */
    111         char *newTarget;
    112         const char *newTargetLimit;
    113         UErrorCode err2 = U_ZERO_ERROR;
    114 
    115         int8_t errBuffLen;
    116 
    117         errBuffLen  = args->converter->charErrorBufferLength;
    118 
    119         /* start the new target at the first free slot in the errbuff.. */
    120         newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
    121 
    122         newTargetLimit = (char *)(args->converter->charErrorBuffer +
    123             sizeof(args->converter->charErrorBuffer));
    124 
    125         if(newTarget >= newTargetLimit)
    126         {
    127             *err = U_INTERNAL_PROGRAM_ERROR;
    128             return;
    129         }
    130 
    131         /* We're going to tell the converter that the errbuff len is empty.
    132         This prevents the existing errbuff from being 'flushed' out onto
    133         itself.  If the errbuff is needed by the converter this time,
    134         we're hosed - we're out of space! */
    135 
    136         args->converter->charErrorBufferLength = 0;
    137 
    138         ucnv_fromUnicode(args->converter,
    139                          &newTarget,
    140                          newTargetLimit,
    141                          source,
    142                          sourceLimit,
    143                          NULL,
    144                          FALSE,
    145                          &err2);
    146 
    147         /* We can go ahead and overwrite the  length here. We know just how
    148         to recalculate it. */
    149 
    150         args->converter->charErrorBufferLength = (int8_t)(
    151             newTarget - (char*)args->converter->charErrorBuffer);
    152 
    153         if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
    154         {
    155             /* now we're REALLY in trouble.
    156             Internal program error - callback shouldn't have written this much
    157             data!
    158             */
    159             *err = U_INTERNAL_PROGRAM_ERROR;
    160             return;
    161         }
    162         /*else {*/
    163             /* sub errs could be invalid/truncated/illegal chars or w/e.
    164             These might want to be passed on up.. But the problem is, we already
    165             need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
    166             other errs.. */
    167 
    168             /*
    169             if(U_FAILURE(err2))
    170             ??
    171             */
    172         /*}*/
    173     }
    174 }
    175 
    176 U_CAPI void  U_EXPORT2
    177 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
    178                            int32_t offsetIndex,
    179                            UErrorCode * err)
    180 {
    181     UConverter *converter;
    182     int32_t length;
    183 
    184     if(U_FAILURE(*err)) {
    185         return;
    186     }
    187     converter = args->converter;
    188     length = converter->subCharLen;
    189 
    190     if(length == 0) {
    191         return;
    192     }
    193 
    194     if(length < 0) {
    195         /*
    196          * Write/convert the substitution string. Its real length is -length.
    197          * Unlike the escape callback, we need not change the converter's
    198          * callback function because ucnv_setSubstString() verified that
    199          * the string can be converted, so we will not get a conversion error
    200          * and will not recurse.
    201          * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
    202          */
    203         const UChar *source = (const UChar *)converter->subChars;
    204         ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
    205         return;
    206     }
    207 
    208     if(converter->sharedData->impl->writeSub!=NULL) {
    209         converter->sharedData->impl->writeSub(args, offsetIndex, err);
    210     }
    211     else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
    212         /*
    213         TODO: Is this untestable because the MBCS converter has a writeSub function to call
    214         and the other converters don't use subChar1?
    215         */
    216         ucnv_cbFromUWriteBytes(args,
    217                                (const char *)&converter->subChar1, 1,
    218                                offsetIndex, err);
    219     }
    220     else {
    221         ucnv_cbFromUWriteBytes(args,
    222                                (const char *)converter->subChars, length,
    223                                offsetIndex, err);
    224     }
    225 }
    226 
    227 U_CAPI void  U_EXPORT2
    228 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
    229                             const UChar* source,
    230                             int32_t length,
    231                             int32_t offsetIndex,
    232                             UErrorCode * err)
    233 {
    234     if(U_FAILURE(*err)) {
    235         return;
    236     }
    237 
    238     ucnv_toUWriteUChars(
    239         args->converter,
    240         source, length,
    241         &args->target, args->targetLimit,
    242         &args->offsets, offsetIndex,
    243         err);
    244 }
    245 
    246 U_CAPI void  U_EXPORT2
    247 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
    248                          int32_t offsetIndex,
    249                        UErrorCode * err)
    250 {
    251     static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
    252 
    253     /* could optimize this case, just one uchar */
    254     if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
    255         ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
    256     } else {
    257         ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
    258     }
    259 }
    260 
    261 #endif
    262