Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2001-2012, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  ustr_wcs.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004sep07
     14 *   created by: Markus W. Scherer
     15 *
     16 *   u_strToWCS() and u_strFromWCS() functions
     17 *   moved here from ustrtrns.c for better modularization.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/ustring.h"
     22 #include "cstring.h"
     23 #include "cwchar.h"
     24 #include "cmemory.h"
     25 #include "ustr_imp.h"
     26 #include "ustr_cnv.h"
     27 
     28 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
     29 
     30 #define _STACK_BUFFER_CAPACITY 1000
     31 #define _BUFFER_CAPACITY_MULTIPLIER 2
     32 
     33 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
     34 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
     35 // Then we could change this to work only with wchar_t buffers.
     36 static inline UBool
     37 u_growAnyBufferFromStatic(void *context,
     38                        void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
     39                        int32_t length, int32_t size) {
     40     // Use char* not void* to avoid the compiler's strict-aliasing assumptions
     41     // and related warnings.
     42     char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
     43     if(newBuffer!=NULL) {
     44         if(length>0) {
     45             uprv_memcpy(newBuffer, *pBuffer, length*size);
     46         }
     47         *pCapacity=reqCapacity;
     48     } else {
     49         *pCapacity=0;
     50     }
     51 
     52     /* release the old pBuffer if it was not statically allocated */
     53     if(*pBuffer!=(char *)context) {
     54         uprv_free(*pBuffer);
     55     }
     56 
     57     *pBuffer=newBuffer;
     58     return (UBool)(newBuffer!=NULL);
     59 }
     60 
     61 /* helper function */
     62 static wchar_t*
     63 _strToWCS(wchar_t *dest,
     64            int32_t destCapacity,
     65            int32_t *pDestLength,
     66            const UChar *src,
     67            int32_t srcLength,
     68            UErrorCode *pErrorCode){
     69 
     70     char stackBuffer [_STACK_BUFFER_CAPACITY];
     71     char* tempBuf = stackBuffer;
     72     int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
     73     char* tempBufLimit = stackBuffer + tempBufCapacity;
     74     UConverter* conv = NULL;
     75     char* saveBuf = tempBuf;
     76     wchar_t* intTarget=NULL;
     77     int32_t intTargetCapacity=0;
     78     int count=0,retVal=0;
     79 
     80     const UChar *pSrcLimit =NULL;
     81     const UChar *pSrc = src;
     82 
     83     conv = u_getDefaultConverter(pErrorCode);
     84 
     85     if(U_FAILURE(*pErrorCode)){
     86         return NULL;
     87     }
     88 
     89     if(srcLength == -1){
     90         srcLength = u_strlen(pSrc);
     91     }
     92 
     93     pSrcLimit = pSrc + srcLength;
     94 
     95     for(;;) {
     96         /* reset the error state */
     97         *pErrorCode = U_ZERO_ERROR;
     98 
     99         /* convert to chars using default converter */
    100         ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
    101         count =(tempBuf - saveBuf);
    102 
    103         /* This should rarely occur */
    104         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
    105             tempBuf = saveBuf;
    106 
    107             /* we dont have enough room on the stack grow the buffer */
    108             int32_t newCapacity = 2 * srcLength;
    109             if(newCapacity <= tempBufCapacity) {
    110                 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
    111             }
    112             if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
    113                     newCapacity, count, 1)) {
    114                 goto cleanup;
    115             }
    116 
    117            saveBuf = tempBuf;
    118            tempBufLimit = tempBuf + tempBufCapacity;
    119            tempBuf = tempBuf + count;
    120 
    121         } else {
    122             break;
    123         }
    124     }
    125 
    126     if(U_FAILURE(*pErrorCode)){
    127         goto cleanup;
    128     }
    129 
    130     /* done with conversion null terminate the char buffer */
    131     if(count>=tempBufCapacity){
    132         tempBuf = saveBuf;
    133         /* we dont have enough room on the stack grow the buffer */
    134         if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
    135                 count+1, count, 1)) {
    136             goto cleanup;
    137         }
    138        saveBuf = tempBuf;
    139     }
    140 
    141     saveBuf[count]=0;
    142 
    143 
    144     /* allocate more space than required
    145      * here we assume that every char requires
    146      * no more than 2 wchar_ts
    147      */
    148     intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
    149     intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
    150 
    151     if(intTarget){
    152 
    153         int32_t nulLen = 0;
    154         int32_t remaining = intTargetCapacity;
    155         wchar_t* pIntTarget=intTarget;
    156         tempBuf = saveBuf;
    157 
    158         /* now convert the mbs to wcs */
    159         for(;;){
    160 
    161             /* we can call the system API since we are sure that
    162              * there is atleast 1 null in the input
    163              */
    164             retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
    165 
    166             if(retVal==-1){
    167                 *pErrorCode = U_INVALID_CHAR_FOUND;
    168                 break;
    169             }else if(retVal== remaining){/* should never occur */
    170                 int numWritten = (pIntTarget-intTarget);
    171                 u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
    172                                           &intTargetCapacity,
    173                                           intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
    174                                           numWritten,
    175                                           sizeof(wchar_t));
    176                 pIntTarget = intTarget;
    177                 remaining=intTargetCapacity;
    178 
    179                 if(nulLen!=count){ /*there are embedded nulls*/
    180                     pIntTarget+=numWritten;
    181                     remaining-=numWritten;
    182                 }
    183 
    184             }else{
    185                 int32_t nulVal;
    186                 /*scan for nulls */
    187                 /* we donot check for limit since tempBuf is null terminated */
    188                 while(tempBuf[nulLen++] != 0){
    189                 }
    190                 nulVal = (nulLen < srcLength) ? 1 : 0;
    191                 pIntTarget = pIntTarget + retVal+nulVal;
    192                 remaining -=(retVal+nulVal);
    193 
    194                 /* check if we have reached the source limit*/
    195                 if(nulLen>=(count)){
    196                     break;
    197                 }
    198             }
    199         }
    200         count = (int32_t)(pIntTarget-intTarget);
    201 
    202         if(0 < count && count <= destCapacity){
    203             uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
    204         }
    205 
    206         if(pDestLength){
    207             *pDestLength = count;
    208         }
    209 
    210         /* free the allocated memory */
    211         uprv_free(intTarget);
    212 
    213     }else{
    214         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    215     }
    216 cleanup:
    217     /* are we still using stack buffer */
    218     if(stackBuffer != saveBuf){
    219         uprv_free(saveBuf);
    220     }
    221     u_terminateWChars(dest,destCapacity,count,pErrorCode);
    222 
    223     u_releaseDefaultConverter(conv);
    224 
    225     return dest;
    226 }
    227 #endif
    228 
    229 U_CAPI wchar_t* U_EXPORT2
    230 u_strToWCS(wchar_t *dest,
    231            int32_t destCapacity,
    232            int32_t *pDestLength,
    233            const UChar *src,
    234            int32_t srcLength,
    235            UErrorCode *pErrorCode){
    236 
    237     /* args check */
    238     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    239         return NULL;
    240     }
    241 
    242     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
    243         (destCapacity<0) || (dest == NULL && destCapacity > 0)
    244     ) {
    245         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    246         return NULL;
    247     }
    248 
    249 #ifdef U_WCHAR_IS_UTF16
    250     /* wchar_t is UTF-16 just do a memcpy */
    251     if(srcLength == -1){
    252         srcLength = u_strlen(src);
    253     }
    254     if(0 < srcLength && srcLength <= destCapacity){
    255         uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
    256     }
    257     if(pDestLength){
    258        *pDestLength = srcLength;
    259     }
    260 
    261     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
    262 
    263     return dest;
    264 
    265 #elif defined U_WCHAR_IS_UTF32
    266 
    267     return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
    268                                   src, srcLength, pErrorCode);
    269 
    270 #else
    271 
    272     return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
    273 
    274 #endif
    275 
    276 }
    277 
    278 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
    279 /* helper function */
    280 static UChar*
    281 _strFromWCS( UChar   *dest,
    282              int32_t destCapacity,
    283              int32_t *pDestLength,
    284              const wchar_t *src,
    285              int32_t srcLength,
    286              UErrorCode *pErrorCode)
    287 {
    288     int32_t retVal =0, count =0 ;
    289     UConverter* conv = NULL;
    290     UChar* pTarget = NULL;
    291     UChar* pTargetLimit = NULL;
    292     UChar* target = NULL;
    293 
    294     UChar uStack [_STACK_BUFFER_CAPACITY];
    295 
    296     wchar_t wStack[_STACK_BUFFER_CAPACITY];
    297     wchar_t* pWStack = wStack;
    298 
    299 
    300     char cStack[_STACK_BUFFER_CAPACITY];
    301     int32_t cStackCap = _STACK_BUFFER_CAPACITY;
    302     char* pCSrc=cStack;
    303     char* pCSave=pCSrc;
    304     char* pCSrcLimit=NULL;
    305 
    306     const wchar_t* pSrc = src;
    307     const wchar_t* pSrcLimit = NULL;
    308 
    309     if(srcLength ==-1){
    310         /* if the wchar_t source is null terminated we can safely
    311          * assume that there are no embedded nulls, this is a fast
    312          * path for null terminated strings.
    313          */
    314         for(;;){
    315             /* convert wchars  to chars */
    316             retVal = uprv_wcstombs(pCSrc,src, cStackCap);
    317 
    318             if(retVal == -1){
    319                 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    320                 goto cleanup;
    321             }else if(retVal >= (cStackCap-1)){
    322                 /* Should rarely occur */
    323                 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    324                     cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
    325                 pCSave = pCSrc;
    326             }else{
    327                 /* converted every thing */
    328                 pCSrc = pCSrc+retVal;
    329                 break;
    330             }
    331         }
    332 
    333     }else{
    334         /* here the source is not null terminated
    335          * so it may have nulls embeded and we need to
    336          * do some extra processing
    337          */
    338         int32_t remaining =cStackCap;
    339 
    340         pSrcLimit = src + srcLength;
    341 
    342         for(;;){
    343             register int32_t nulLen = 0;
    344 
    345             /* find nulls in the string */
    346             while(nulLen<srcLength && pSrc[nulLen++]!=0){
    347             }
    348 
    349             if((pSrc+nulLen) < pSrcLimit){
    350                 /* check if we have enough room in pCSrc */
    351                 if(remaining < (nulLen * MB_CUR_MAX)){
    352                     /* should rarely occur */
    353                     int32_t len = (pCSrc-pCSave);
    354                     pCSrc = pCSave;
    355                     /* we do not have enough room so grow the buffer*/
    356                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    357                            _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
    358 
    359                     pCSave = pCSrc;
    360                     pCSrc = pCSave+len;
    361                     remaining = cStackCap-(pCSrc - pCSave);
    362                 }
    363 
    364                 /* we have found a null  so convert the
    365                  * chunk from begining of non-null char to null
    366                  */
    367                 retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
    368 
    369                 if(retVal==-1){
    370                     /* an error occurred bail out */
    371                     *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    372                     goto cleanup;
    373                 }
    374 
    375                 pCSrc += retVal+1 /* already null terminated */;
    376 
    377                 pSrc += nulLen; /* skip past the null */
    378                 srcLength-=nulLen; /* decrement the srcLength */
    379                 remaining -= (pCSrc-pCSave);
    380 
    381 
    382             }else{
    383                 /* the source is not null terminated and we are
    384                  * end of source so we copy the source to a temp buffer
    385                  * null terminate it and convert wchar_ts to chars
    386                  */
    387                 if(nulLen >= _STACK_BUFFER_CAPACITY){
    388                     /* Should rarely occcur */
    389                     /* allocate new buffer buffer */
    390                     pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
    391                     if(pWStack==NULL){
    392                         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    393                         goto cleanup;
    394                     }
    395                 }
    396                 if(nulLen>0){
    397                     /* copy the contents to tempStack */
    398                     uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t));
    399                 }
    400 
    401                 /* null terminate the tempBuffer */
    402                 pWStack[nulLen] =0 ;
    403 
    404                 if(remaining < (nulLen * MB_CUR_MAX)){
    405                     /* Should rarely occur */
    406                     int32_t len = (pCSrc-pCSave);
    407                     pCSrc = pCSave;
    408                     /* we do not have enough room so grow the buffer*/
    409                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    410                            cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
    411 
    412                     pCSave = pCSrc;
    413                     pCSrc = pCSave+len;
    414                     remaining = cStackCap-(pCSrc - pCSave);
    415                 }
    416                 /* convert to chars */
    417                 retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
    418 
    419                 pCSrc += retVal;
    420                 pSrc  += nulLen;
    421                 srcLength-=nulLen; /* decrement the srcLength */
    422                 break;
    423             }
    424         }
    425     }
    426 
    427     /* OK..now we have converted from wchar_ts to chars now
    428      * convert chars to UChars
    429      */
    430     pCSrcLimit = pCSrc;
    431     pCSrc = pCSave;
    432     pTarget = target= dest;
    433     pTargetLimit = dest + destCapacity;
    434 
    435     conv= u_getDefaultConverter(pErrorCode);
    436 
    437     if(U_FAILURE(*pErrorCode)|| conv==NULL){
    438         goto cleanup;
    439     }
    440 
    441     for(;;) {
    442 
    443         *pErrorCode = U_ZERO_ERROR;
    444 
    445         /* convert to stack buffer*/
    446         ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
    447 
    448         /* increment count to number written to stack */
    449         count+= pTarget - target;
    450 
    451         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
    452             target = uStack;
    453             pTarget = uStack;
    454             pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
    455         } else {
    456             break;
    457         }
    458 
    459     }
    460 
    461     if(pDestLength){
    462         *pDestLength =count;
    463     }
    464 
    465     u_terminateUChars(dest,destCapacity,count,pErrorCode);
    466 
    467 cleanup:
    468 
    469     if(cStack != pCSave){
    470         uprv_free(pCSave);
    471     }
    472 
    473     if(wStack != pWStack){
    474         uprv_free(pWStack);
    475     }
    476 
    477     u_releaseDefaultConverter(conv);
    478 
    479     return dest;
    480 }
    481 #endif
    482 
    483 U_CAPI UChar* U_EXPORT2
    484 u_strFromWCS(UChar   *dest,
    485              int32_t destCapacity,
    486              int32_t *pDestLength,
    487              const wchar_t *src,
    488              int32_t srcLength,
    489              UErrorCode *pErrorCode)
    490 {
    491 
    492     /* args check */
    493     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    494         return NULL;
    495     }
    496 
    497     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
    498         (destCapacity<0) || (dest == NULL && destCapacity > 0)
    499     ) {
    500         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    501         return NULL;
    502     }
    503 
    504 #ifdef U_WCHAR_IS_UTF16
    505     /* wchar_t is UTF-16 just do a memcpy */
    506     if(srcLength == -1){
    507         srcLength = u_strlen(src);
    508     }
    509     if(0 < srcLength && srcLength <= destCapacity){
    510         uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
    511     }
    512     if(pDestLength){
    513        *pDestLength = srcLength;
    514     }
    515 
    516     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
    517 
    518     return dest;
    519 
    520 #elif defined U_WCHAR_IS_UTF32
    521 
    522     return u_strFromUTF32(dest, destCapacity, pDestLength,
    523                           (UChar32*)src, srcLength, pErrorCode);
    524 
    525 #else
    526 
    527     return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
    528 
    529 #endif
    530 
    531 }
    532 
    533 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
    534