Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2001-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  ustr_wcs.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004sep07
     14 *   created by: Markus W. Scherer
     15 *
     16 *   u_strToWCS() and u_strFromWCS() functions
     17 *   moved here from ustrtrns.c for better modularization.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/ustring.h"
     22 #include "cstring.h"
     23 #include "cwchar.h"
     24 #include "cmemory.h"
     25 #include "ustr_imp.h"
     26 #include "ustr_cnv.h"
     27 
     28 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
     29 
     30 #define _STACK_BUFFER_CAPACITY 1000
     31 #define _BUFFER_CAPACITY_MULTIPLIER 2
     32 
     33 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
     34 static U_INLINE UBool
     35 u_growAnyBufferFromStatic(void *context,
     36                        void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
     37                        int32_t length, int32_t size) {
     38 
     39     void *newBuffer=uprv_malloc(reqCapacity*size);
     40     if(newBuffer!=NULL) {
     41         if(length>0) {
     42             uprv_memcpy(newBuffer, *pBuffer, length*size);
     43         }
     44         *pCapacity=reqCapacity;
     45     } else {
     46         *pCapacity=0;
     47     }
     48 
     49     /* release the old pBuffer if it was not statically allocated */
     50     if(*pBuffer!=(void *)context) {
     51         uprv_free(*pBuffer);
     52     }
     53 
     54     *pBuffer=newBuffer;
     55     return (UBool)(newBuffer!=NULL);
     56 }
     57 
     58 /* helper function */
     59 static wchar_t*
     60 _strToWCS(wchar_t *dest,
     61            int32_t destCapacity,
     62            int32_t *pDestLength,
     63            const UChar *src,
     64            int32_t srcLength,
     65            UErrorCode *pErrorCode){
     66 
     67     char stackBuffer [_STACK_BUFFER_CAPACITY];
     68     char* tempBuf = stackBuffer;
     69     int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
     70     char* tempBufLimit = stackBuffer + tempBufCapacity;
     71     UConverter* conv = NULL;
     72     char* saveBuf = tempBuf;
     73     wchar_t* intTarget=NULL;
     74     int32_t intTargetCapacity=0;
     75     int count=0,retVal=0;
     76 
     77     const UChar *pSrcLimit =NULL;
     78     const UChar *pSrc = src;
     79 
     80     conv = u_getDefaultConverter(pErrorCode);
     81 
     82     if(U_FAILURE(*pErrorCode)){
     83         return NULL;
     84     }
     85 
     86     if(srcLength == -1){
     87         srcLength = u_strlen(pSrc);
     88     }
     89 
     90     pSrcLimit = pSrc + srcLength;
     91 
     92     for(;;) {
     93         /* reset the error state */
     94         *pErrorCode = U_ZERO_ERROR;
     95 
     96         /* convert to chars using default converter */
     97         ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
     98         count =(tempBuf - saveBuf);
     99 
    100         /* This should rarely occur */
    101         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
    102             tempBuf = saveBuf;
    103 
    104             /* we dont have enough room on the stack grow the buffer */
    105             if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
    106                 (_BUFFER_CAPACITY_MULTIPLIER * (srcLength)), count,sizeof(char))){
    107                 goto cleanup;
    108             }
    109 
    110            saveBuf = tempBuf;
    111            tempBufLimit = tempBuf + tempBufCapacity;
    112            tempBuf = tempBuf + count;
    113 
    114         } else {
    115             break;
    116         }
    117     }
    118 
    119     if(U_FAILURE(*pErrorCode)){
    120         goto cleanup;
    121     }
    122 
    123     /* done with conversion null terminate the char buffer */
    124     if(count>=tempBufCapacity){
    125         tempBuf = saveBuf;
    126         /* we dont have enough room on the stack grow the buffer */
    127         if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
    128             tempBufCapacity-count+1, count,sizeof(char))){
    129             goto cleanup;
    130         }
    131        saveBuf = tempBuf;
    132     }
    133 
    134     saveBuf[count]=0;
    135 
    136 
    137     /* allocate more space than required
    138      * here we assume that every char requires
    139      * no more than 2 wchar_ts
    140      */
    141     intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
    142     intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
    143 
    144     if(intTarget){
    145 
    146         int32_t nulLen = 0;
    147         int32_t remaining = intTargetCapacity;
    148         wchar_t* pIntTarget=intTarget;
    149         tempBuf = saveBuf;
    150 
    151         /* now convert the mbs to wcs */
    152         for(;;){
    153 
    154             /* we can call the system API since we are sure that
    155              * there is atleast 1 null in the input
    156              */
    157             retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
    158 
    159             if(retVal==-1){
    160                 *pErrorCode = U_INVALID_CHAR_FOUND;
    161                 break;
    162             }else if(retVal== remaining){/* should never occur */
    163                 int numWritten = (pIntTarget-intTarget);
    164                 u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
    165                                           &intTargetCapacity,
    166                                           intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
    167                                           numWritten,
    168                                           sizeof(wchar_t));
    169                 pIntTarget = intTarget;
    170                 remaining=intTargetCapacity;
    171 
    172                 if(nulLen!=count){ /*there are embedded nulls*/
    173                     pIntTarget+=numWritten;
    174                     remaining-=numWritten;
    175                 }
    176 
    177             }else{
    178                 int32_t nulVal;
    179                 /*scan for nulls */
    180                 /* we donot check for limit since tempBuf is null terminated */
    181                 while(tempBuf[nulLen++] != 0){
    182                 }
    183                 nulVal = (nulLen < srcLength) ? 1 : 0;
    184                 pIntTarget = pIntTarget + retVal+nulVal;
    185                 remaining -=(retVal+nulVal);
    186 
    187                 /* check if we have reached the source limit*/
    188                 if(nulLen>=(count)){
    189                     break;
    190                 }
    191             }
    192         }
    193         count = (int32_t)(pIntTarget-intTarget);
    194 
    195         if(0 < count && count <= destCapacity){
    196             uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
    197         }
    198 
    199         if(pDestLength){
    200             *pDestLength = count;
    201         }
    202 
    203         /* free the allocated memory */
    204         uprv_free(intTarget);
    205 
    206     }else{
    207         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    208     }
    209 cleanup:
    210     /* are we still using stack buffer */
    211     if(stackBuffer != saveBuf){
    212         uprv_free(saveBuf);
    213     }
    214     u_terminateWChars(dest,destCapacity,count,pErrorCode);
    215 
    216     u_releaseDefaultConverter(conv);
    217 
    218     return dest;
    219 }
    220 #endif
    221 
    222 U_CAPI wchar_t* U_EXPORT2
    223 u_strToWCS(wchar_t *dest,
    224            int32_t destCapacity,
    225            int32_t *pDestLength,
    226            const UChar *src,
    227            int32_t srcLength,
    228            UErrorCode *pErrorCode){
    229 
    230     /* args check */
    231     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    232         return NULL;
    233     }
    234 
    235     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
    236         (destCapacity<0) || (dest == NULL && destCapacity > 0)
    237     ) {
    238         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    239         return NULL;
    240     }
    241 
    242 #ifdef U_WCHAR_IS_UTF16
    243     /* wchar_t is UTF-16 just do a memcpy */
    244     if(srcLength == -1){
    245         srcLength = u_strlen(src);
    246     }
    247     if(0 < srcLength && srcLength <= destCapacity){
    248         uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
    249     }
    250     if(pDestLength){
    251        *pDestLength = srcLength;
    252     }
    253 
    254     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
    255 
    256     return dest;
    257 
    258 #elif defined U_WCHAR_IS_UTF32
    259 
    260     return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
    261                                   src, srcLength, pErrorCode);
    262 
    263 #else
    264 
    265     return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
    266 
    267 #endif
    268 
    269 }
    270 
    271 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
    272 /* helper function */
    273 static UChar*
    274 _strFromWCS( UChar   *dest,
    275              int32_t destCapacity,
    276              int32_t *pDestLength,
    277              const wchar_t *src,
    278              int32_t srcLength,
    279              UErrorCode *pErrorCode)
    280 {
    281     int32_t retVal =0, count =0 ;
    282     UConverter* conv = NULL;
    283     UChar* pTarget = NULL;
    284     UChar* pTargetLimit = NULL;
    285     UChar* target = NULL;
    286 
    287     UChar uStack [_STACK_BUFFER_CAPACITY];
    288 
    289     wchar_t wStack[_STACK_BUFFER_CAPACITY];
    290     wchar_t* pWStack = wStack;
    291 
    292 
    293     char cStack[_STACK_BUFFER_CAPACITY];
    294     int32_t cStackCap = _STACK_BUFFER_CAPACITY;
    295     char* pCSrc=cStack;
    296     char* pCSave=pCSrc;
    297     char* pCSrcLimit=NULL;
    298 
    299     const wchar_t* pSrc = src;
    300     const wchar_t* pSrcLimit = NULL;
    301 
    302     if(srcLength ==-1){
    303         /* if the wchar_t source is null terminated we can safely
    304          * assume that there are no embedded nulls, this is a fast
    305          * path for null terminated strings.
    306          */
    307         for(;;){
    308             /* convert wchars  to chars */
    309             retVal = uprv_wcstombs(pCSrc,src, cStackCap);
    310 
    311             if(retVal == -1){
    312                 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    313                 goto cleanup;
    314             }else if(retVal >= (cStackCap-1)){
    315                 /* Should rarely occur */
    316                 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    317                     cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
    318                 pCSave = pCSrc;
    319             }else{
    320                 /* converted every thing */
    321                 pCSrc = pCSrc+retVal;
    322                 break;
    323             }
    324         }
    325 
    326     }else{
    327         /* here the source is not null terminated
    328          * so it may have nulls embeded and we need to
    329          * do some extra processing
    330          */
    331         int32_t remaining =cStackCap;
    332 
    333         pSrcLimit = src + srcLength;
    334 
    335         for(;;){
    336             register int32_t nulLen = 0;
    337 
    338             /* find nulls in the string */
    339             while(nulLen<srcLength && pSrc[nulLen++]!=0){
    340             }
    341 
    342             if((pSrc+nulLen) < pSrcLimit){
    343                 /* check if we have enough room in pCSrc */
    344                 if(remaining < (nulLen * MB_CUR_MAX)){
    345                     /* should rarely occur */
    346                     int32_t len = (pCSrc-pCSave);
    347                     pCSrc = pCSave;
    348                     /* we do not have enough room so grow the buffer*/
    349                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    350                            _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
    351 
    352                     pCSave = pCSrc;
    353                     pCSrc = pCSave+len;
    354                     remaining = cStackCap-(pCSrc - pCSave);
    355                 }
    356 
    357                 /* we have found a null  so convert the
    358                  * chunk from begining of non-null char to null
    359                  */
    360                 retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
    361 
    362                 if(retVal==-1){
    363                     /* an error occurred bail out */
    364                     *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    365                     goto cleanup;
    366                 }
    367 
    368                 pCSrc += retVal+1 /* already null terminated */;
    369 
    370                 pSrc += nulLen; /* skip past the null */
    371                 srcLength-=nulLen; /* decrement the srcLength */
    372                 remaining -= (pCSrc-pCSave);
    373 
    374 
    375             }else{
    376                 /* the source is not null terminated and we are
    377                  * end of source so we copy the source to a temp buffer
    378                  * null terminate it and convert wchar_ts to chars
    379                  */
    380                 if(nulLen >= _STACK_BUFFER_CAPACITY){
    381                     /* Should rarely occcur */
    382                     /* allocate new buffer buffer */
    383                     pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
    384                     if(pWStack==NULL){
    385                         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    386                         goto cleanup;
    387                     }
    388                 }
    389                 if(nulLen>0){
    390                     /* copy the contents to tempStack */
    391                     uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t));
    392                 }
    393 
    394                 /* null terminate the tempBuffer */
    395                 pWStack[nulLen] =0 ;
    396 
    397                 if(remaining < (nulLen * MB_CUR_MAX)){
    398                     /* Should rarely occur */
    399                     int32_t len = (pCSrc-pCSave);
    400                     pCSrc = pCSave;
    401                     /* we do not have enough room so grow the buffer*/
    402                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    403                            cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
    404 
    405                     pCSave = pCSrc;
    406                     pCSrc = pCSave+len;
    407                     remaining = cStackCap-(pCSrc - pCSave);
    408                 }
    409                 /* convert to chars */
    410                 retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
    411 
    412                 pCSrc += retVal;
    413                 pSrc  += nulLen;
    414                 srcLength-=nulLen; /* decrement the srcLength */
    415                 break;
    416             }
    417         }
    418     }
    419 
    420     /* OK..now we have converted from wchar_ts to chars now
    421      * convert chars to UChars
    422      */
    423     pCSrcLimit = pCSrc;
    424     pCSrc = pCSave;
    425     pTarget = target= dest;
    426     pTargetLimit = dest + destCapacity;
    427 
    428     conv= u_getDefaultConverter(pErrorCode);
    429 
    430     if(U_FAILURE(*pErrorCode)|| conv==NULL){
    431         goto cleanup;
    432     }
    433 
    434     for(;;) {
    435 
    436         *pErrorCode = U_ZERO_ERROR;
    437 
    438         /* convert to stack buffer*/
    439         ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
    440 
    441         /* increment count to number written to stack */
    442         count+= pTarget - target;
    443 
    444         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
    445             target = uStack;
    446             pTarget = uStack;
    447             pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
    448         } else {
    449             break;
    450         }
    451 
    452     }
    453 
    454     if(pDestLength){
    455         *pDestLength =count;
    456     }
    457 
    458     u_terminateUChars(dest,destCapacity,count,pErrorCode);
    459 
    460 cleanup:
    461 
    462     if(cStack != pCSave){
    463         uprv_free(pCSave);
    464     }
    465 
    466     if(wStack != pWStack){
    467         uprv_free(pWStack);
    468     }
    469 
    470     u_releaseDefaultConverter(conv);
    471 
    472     return dest;
    473 }
    474 #endif
    475 
    476 U_CAPI UChar* U_EXPORT2
    477 u_strFromWCS(UChar   *dest,
    478              int32_t destCapacity,
    479              int32_t *pDestLength,
    480              const wchar_t *src,
    481              int32_t srcLength,
    482              UErrorCode *pErrorCode)
    483 {
    484 
    485     /* args check */
    486     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    487         return NULL;
    488     }
    489 
    490     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
    491         (destCapacity<0) || (dest == NULL && destCapacity > 0)
    492     ) {
    493         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    494         return NULL;
    495     }
    496 
    497 #ifdef U_WCHAR_IS_UTF16
    498     /* wchar_t is UTF-16 just do a memcpy */
    499     if(srcLength == -1){
    500         srcLength = u_strlen(src);
    501     }
    502     if(0 < srcLength && srcLength <= destCapacity){
    503         uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
    504     }
    505     if(pDestLength){
    506        *pDestLength = srcLength;
    507     }
    508 
    509     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
    510 
    511     return dest;
    512 
    513 #elif defined U_WCHAR_IS_UTF32
    514 
    515     return u_strFromUTF32(dest, destCapacity, pDestLength,
    516                           (UChar32*)src, srcLength, pErrorCode);
    517 
    518 #else
    519 
    520     return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
    521 
    522 #endif
    523 
    524 }
    525 
    526 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
    527