Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2001-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  ustr_wcs.cpp
     11 *   encoding:   US-ASCII
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2004sep07
     16 *   created by: Markus W. Scherer
     17 *
     18 *   u_strToWCS() and u_strFromWCS() functions
     19 *   moved here from ustrtrns.c for better modularization.
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/ustring.h"
     24 #include "cstring.h"
     25 #include "cwchar.h"
     26 #include "cmemory.h"
     27 #include "ustr_imp.h"
     28 #include "ustr_cnv.h"
     29 
     30 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
     31 
     32 #define _STACK_BUFFER_CAPACITY 1000
     33 #define _BUFFER_CAPACITY_MULTIPLIER 2
     34 
     35 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
     36 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
     37 // Then we could change this to work only with wchar_t buffers.
     38 static inline UBool
     39 u_growAnyBufferFromStatic(void *context,
     40                        void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
     41                        int32_t length, int32_t size) {
     42     // Use char* not void* to avoid the compiler's strict-aliasing assumptions
     43     // and related warnings.
     44     char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
     45     if(newBuffer!=NULL) {
     46         if(length>0) {
     47             uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
     48         }
     49         *pCapacity=reqCapacity;
     50     } else {
     51         *pCapacity=0;
     52     }
     53 
     54     /* release the old pBuffer if it was not statically allocated */
     55     if(*pBuffer!=(char *)context) {
     56         uprv_free(*pBuffer);
     57     }
     58 
     59     *pBuffer=newBuffer;
     60     return (UBool)(newBuffer!=NULL);
     61 }
     62 
     63 /* helper function */
     64 static wchar_t*
     65 _strToWCS(wchar_t *dest,
     66            int32_t destCapacity,
     67            int32_t *pDestLength,
     68            const UChar *src,
     69            int32_t srcLength,
     70            UErrorCode *pErrorCode){
     71 
     72     char stackBuffer [_STACK_BUFFER_CAPACITY];
     73     char* tempBuf = stackBuffer;
     74     int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
     75     char* tempBufLimit = stackBuffer + tempBufCapacity;
     76     UConverter* conv = NULL;
     77     char* saveBuf = tempBuf;
     78     wchar_t* intTarget=NULL;
     79     int32_t intTargetCapacity=0;
     80     int count=0,retVal=0;
     81 
     82     const UChar *pSrcLimit =NULL;
     83     const UChar *pSrc = src;
     84 
     85     conv = u_getDefaultConverter(pErrorCode);
     86 
     87     if(U_FAILURE(*pErrorCode)){
     88         return NULL;
     89     }
     90 
     91     if(srcLength == -1){
     92         srcLength = u_strlen(pSrc);
     93     }
     94 
     95     pSrcLimit = pSrc + srcLength;
     96 
     97     for(;;) {
     98         /* reset the error state */
     99         *pErrorCode = U_ZERO_ERROR;
    100 
    101         /* convert to chars using default converter */
    102         ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
    103         count =(tempBuf - saveBuf);
    104 
    105         /* This should rarely occur */
    106         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
    107             tempBuf = saveBuf;
    108 
    109             /* we dont have enough room on the stack grow the buffer */
    110             int32_t newCapacity = 2 * srcLength;
    111             if(newCapacity <= tempBufCapacity) {
    112                 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
    113             }
    114             if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
    115                     newCapacity, count, 1)) {
    116                 goto cleanup;
    117             }
    118 
    119            saveBuf = tempBuf;
    120            tempBufLimit = tempBuf + tempBufCapacity;
    121            tempBuf = tempBuf + count;
    122 
    123         } else {
    124             break;
    125         }
    126     }
    127 
    128     if(U_FAILURE(*pErrorCode)){
    129         goto cleanup;
    130     }
    131 
    132     /* done with conversion null terminate the char buffer */
    133     if(count>=tempBufCapacity){
    134         tempBuf = saveBuf;
    135         /* we dont have enough room on the stack grow the buffer */
    136         if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
    137                 count+1, count, 1)) {
    138             goto cleanup;
    139         }
    140        saveBuf = tempBuf;
    141     }
    142 
    143     saveBuf[count]=0;
    144 
    145 
    146     /* allocate more space than required
    147      * here we assume that every char requires
    148      * no more than 2 wchar_ts
    149      */
    150     intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
    151     intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
    152 
    153     if(intTarget){
    154 
    155         int32_t nulLen = 0;
    156         int32_t remaining = intTargetCapacity;
    157         wchar_t* pIntTarget=intTarget;
    158         tempBuf = saveBuf;
    159 
    160         /* now convert the mbs to wcs */
    161         for(;;){
    162 
    163             /* we can call the system API since we are sure that
    164              * there is atleast 1 null in the input
    165              */
    166             retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
    167 
    168             if(retVal==-1){
    169                 *pErrorCode = U_INVALID_CHAR_FOUND;
    170                 break;
    171             }else if(retVal== remaining){/* should never occur */
    172                 int numWritten = (pIntTarget-intTarget);
    173                 u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
    174                                           &intTargetCapacity,
    175                                           intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
    176                                           numWritten,
    177                                           sizeof(wchar_t));
    178                 pIntTarget = intTarget;
    179                 remaining=intTargetCapacity;
    180 
    181                 if(nulLen!=count){ /*there are embedded nulls*/
    182                     pIntTarget+=numWritten;
    183                     remaining-=numWritten;
    184                 }
    185 
    186             }else{
    187                 int32_t nulVal;
    188                 /*scan for nulls */
    189                 /* we donot check for limit since tempBuf is null terminated */
    190                 while(tempBuf[nulLen++] != 0){
    191                 }
    192                 nulVal = (nulLen < srcLength) ? 1 : 0;
    193                 pIntTarget = pIntTarget + retVal+nulVal;
    194                 remaining -=(retVal+nulVal);
    195 
    196                 /* check if we have reached the source limit*/
    197                 if(nulLen>=(count)){
    198                     break;
    199                 }
    200             }
    201         }
    202         count = (int32_t)(pIntTarget-intTarget);
    203 
    204         if(0 < count && count <= destCapacity){
    205             uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
    206         }
    207 
    208         if(pDestLength){
    209             *pDestLength = count;
    210         }
    211 
    212         /* free the allocated memory */
    213         uprv_free(intTarget);
    214 
    215     }else{
    216         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    217     }
    218 cleanup:
    219     /* are we still using stack buffer */
    220     if(stackBuffer != saveBuf){
    221         uprv_free(saveBuf);
    222     }
    223     u_terminateWChars(dest,destCapacity,count,pErrorCode);
    224 
    225     u_releaseDefaultConverter(conv);
    226 
    227     return dest;
    228 }
    229 #endif
    230 
    231 U_CAPI wchar_t* U_EXPORT2
    232 u_strToWCS(wchar_t *dest,
    233            int32_t destCapacity,
    234            int32_t *pDestLength,
    235            const UChar *src,
    236            int32_t srcLength,
    237            UErrorCode *pErrorCode){
    238 
    239     /* args check */
    240     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    241         return NULL;
    242     }
    243 
    244     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
    245         (destCapacity<0) || (dest == NULL && destCapacity > 0)
    246     ) {
    247         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    248         return NULL;
    249     }
    250 
    251 #ifdef U_WCHAR_IS_UTF16
    252     /* wchar_t is UTF-16 just do a memcpy */
    253     if(srcLength == -1){
    254         srcLength = u_strlen(src);
    255     }
    256     if(0 < srcLength && srcLength <= destCapacity){
    257         u_memcpy(dest, src, srcLength);
    258     }
    259     if(pDestLength){
    260        *pDestLength = srcLength;
    261     }
    262 
    263     u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode);
    264 
    265     return dest;
    266 
    267 #elif defined U_WCHAR_IS_UTF32
    268 
    269     return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
    270                                   src, srcLength, pErrorCode);
    271 
    272 #else
    273 
    274     return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
    275 
    276 #endif
    277 
    278 }
    279 
    280 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
    281 /* helper function */
    282 static UChar*
    283 _strFromWCS( UChar   *dest,
    284              int32_t destCapacity,
    285              int32_t *pDestLength,
    286              const wchar_t *src,
    287              int32_t srcLength,
    288              UErrorCode *pErrorCode)
    289 {
    290     int32_t retVal =0, count =0 ;
    291     UConverter* conv = NULL;
    292     UChar* pTarget = NULL;
    293     UChar* pTargetLimit = NULL;
    294     UChar* target = NULL;
    295 
    296     UChar uStack [_STACK_BUFFER_CAPACITY];
    297 
    298     wchar_t wStack[_STACK_BUFFER_CAPACITY];
    299     wchar_t* pWStack = wStack;
    300 
    301 
    302     char cStack[_STACK_BUFFER_CAPACITY];
    303     int32_t cStackCap = _STACK_BUFFER_CAPACITY;
    304     char* pCSrc=cStack;
    305     char* pCSave=pCSrc;
    306     char* pCSrcLimit=NULL;
    307 
    308     const wchar_t* pSrc = src;
    309     const wchar_t* pSrcLimit = NULL;
    310 
    311     if(srcLength ==-1){
    312         /* if the wchar_t source is null terminated we can safely
    313          * assume that there are no embedded nulls, this is a fast
    314          * path for null terminated strings.
    315          */
    316         for(;;){
    317             /* convert wchars  to chars */
    318             retVal = uprv_wcstombs(pCSrc,src, cStackCap);
    319 
    320             if(retVal == -1){
    321                 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    322                 goto cleanup;
    323             }else if(retVal >= (cStackCap-1)){
    324                 /* Should rarely occur */
    325                 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    326                     cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
    327                 pCSave = pCSrc;
    328             }else{
    329                 /* converted every thing */
    330                 pCSrc = pCSrc+retVal;
    331                 break;
    332             }
    333         }
    334 
    335     }else{
    336         /* here the source is not null terminated
    337          * so it may have nulls embeded and we need to
    338          * do some extra processing
    339          */
    340         int32_t remaining =cStackCap;
    341 
    342         pSrcLimit = src + srcLength;
    343 
    344         for(;;){
    345             register int32_t nulLen = 0;
    346 
    347             /* find nulls in the string */
    348             while(nulLen<srcLength && pSrc[nulLen++]!=0){
    349             }
    350 
    351             if((pSrc+nulLen) < pSrcLimit){
    352                 /* check if we have enough room in pCSrc */
    353                 if(remaining < (nulLen * MB_CUR_MAX)){
    354                     /* should rarely occur */
    355                     int32_t len = (pCSrc-pCSave);
    356                     pCSrc = pCSave;
    357                     /* we do not have enough room so grow the buffer*/
    358                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    359                            _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
    360 
    361                     pCSave = pCSrc;
    362                     pCSrc = pCSave+len;
    363                     remaining = cStackCap-(pCSrc - pCSave);
    364                 }
    365 
    366                 /* we have found a null  so convert the
    367                  * chunk from begining of non-null char to null
    368                  */
    369                 retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
    370 
    371                 if(retVal==-1){
    372                     /* an error occurred bail out */
    373                     *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    374                     goto cleanup;
    375                 }
    376 
    377                 pCSrc += retVal+1 /* already null terminated */;
    378 
    379                 pSrc += nulLen; /* skip past the null */
    380                 srcLength-=nulLen; /* decrement the srcLength */
    381                 remaining -= (pCSrc-pCSave);
    382 
    383 
    384             }else{
    385                 /* the source is not null terminated and we are
    386                  * end of source so we copy the source to a temp buffer
    387                  * null terminate it and convert wchar_ts to chars
    388                  */
    389                 if(nulLen >= _STACK_BUFFER_CAPACITY){
    390                     /* Should rarely occcur */
    391                     /* allocate new buffer buffer */
    392                     pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
    393                     if(pWStack==NULL){
    394                         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    395                         goto cleanup;
    396                     }
    397                 }
    398                 if(nulLen>0){
    399                     /* copy the contents to tempStack */
    400                     uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
    401                 }
    402 
    403                 /* null terminate the tempBuffer */
    404                 pWStack[nulLen] =0 ;
    405 
    406                 if(remaining < (nulLen * MB_CUR_MAX)){
    407                     /* Should rarely occur */
    408                     int32_t len = (pCSrc-pCSave);
    409                     pCSrc = pCSave;
    410                     /* we do not have enough room so grow the buffer*/
    411                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
    412                            cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
    413 
    414                     pCSave = pCSrc;
    415                     pCSrc = pCSave+len;
    416                     remaining = cStackCap-(pCSrc - pCSave);
    417                 }
    418                 /* convert to chars */
    419                 retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
    420 
    421                 pCSrc += retVal;
    422                 pSrc  += nulLen;
    423                 srcLength-=nulLen; /* decrement the srcLength */
    424                 break;
    425             }
    426         }
    427     }
    428 
    429     /* OK..now we have converted from wchar_ts to chars now
    430      * convert chars to UChars
    431      */
    432     pCSrcLimit = pCSrc;
    433     pCSrc = pCSave;
    434     pTarget = target= dest;
    435     pTargetLimit = dest + destCapacity;
    436 
    437     conv= u_getDefaultConverter(pErrorCode);
    438 
    439     if(U_FAILURE(*pErrorCode)|| conv==NULL){
    440         goto cleanup;
    441     }
    442 
    443     for(;;) {
    444 
    445         *pErrorCode = U_ZERO_ERROR;
    446 
    447         /* convert to stack buffer*/
    448         ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
    449 
    450         /* increment count to number written to stack */
    451         count+= pTarget - target;
    452 
    453         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
    454             target = uStack;
    455             pTarget = uStack;
    456             pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
    457         } else {
    458             break;
    459         }
    460 
    461     }
    462 
    463     if(pDestLength){
    464         *pDestLength =count;
    465     }
    466 
    467     u_terminateUChars(dest,destCapacity,count,pErrorCode);
    468 
    469 cleanup:
    470 
    471     if(cStack != pCSave){
    472         uprv_free(pCSave);
    473     }
    474 
    475     if(wStack != pWStack){
    476         uprv_free(pWStack);
    477     }
    478 
    479     u_releaseDefaultConverter(conv);
    480 
    481     return dest;
    482 }
    483 #endif
    484 
    485 U_CAPI UChar* U_EXPORT2
    486 u_strFromWCS(UChar   *dest,
    487              int32_t destCapacity,
    488              int32_t *pDestLength,
    489              const wchar_t *src,
    490              int32_t srcLength,
    491              UErrorCode *pErrorCode)
    492 {
    493 
    494     /* args check */
    495     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
    496         return NULL;
    497     }
    498 
    499     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
    500         (destCapacity<0) || (dest == NULL && destCapacity > 0)
    501     ) {
    502         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    503         return NULL;
    504     }
    505 
    506 #ifdef U_WCHAR_IS_UTF16
    507     /* wchar_t is UTF-16 just do a memcpy */
    508     if(srcLength == -1){
    509         srcLength = u_strlen((const UChar *)src);
    510     }
    511     if(0 < srcLength && srcLength <= destCapacity){
    512         u_memcpy(dest, src, srcLength);
    513     }
    514     if(pDestLength){
    515        *pDestLength = srcLength;
    516     }
    517 
    518     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
    519 
    520     return dest;
    521 
    522 #elif defined U_WCHAR_IS_UTF32
    523 
    524     return u_strFromUTF32(dest, destCapacity, pDestLength,
    525                           (UChar32*)src, srcLength, pErrorCode);
    526 
    527 #else
    528 
    529     return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
    530 
    531 #endif
    532 
    533 }
    534 
    535 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
    536