Home | History | Annotate | Download | only in Common
      1 // Common/StringConvert.cpp
      2 
      3 #include "StdAfx.h"
      4 
      5 #include "StringConvert.h"
      6 
      7 #ifndef _WIN32
      8 #include <stdlib.h>
      9 #endif
     10 
     11 static const char k_DefultChar = '_';
     12 
     13 #ifdef _WIN32
     14 
     15 /*
     16 MultiByteToWideChar(CodePage, DWORD dwFlags,
     17     LPCSTR lpMultiByteStr, int cbMultiByte,
     18     LPWSTR lpWideCharStr, int cchWideChar)
     19 
     20   if (cbMultiByte == 0)
     21     return: 0. ERR: ERROR_INVALID_PARAMETER
     22 
     23   if (cchWideChar == 0)
     24     return: the required buffer size in characters.
     25 
     26   if (supplied buffer size was not large enough)
     27     return: 0. ERR: ERROR_INSUFFICIENT_BUFFER
     28     The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex)
     29 
     30   If there are illegal characters:
     31     if MB_ERR_INVALID_CHARS is set in dwFlags:
     32       - the function stops conversion on illegal character.
     33       - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION.
     34 
     35     if MB_ERR_INVALID_CHARS is NOT set in dwFlags:
     36       before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0.
     37       in Vista+:    illegal character is not dropped (MSDN). Undocumented: illegal
     38                     character is converted to U+FFFD, which is REPLACEMENT CHARACTER.
     39 */
     40 
     41 
     42 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
     43 {
     44   dest.Empty();
     45   if (src.IsEmpty())
     46     return;
     47   {
     48     /*
     49     wchar_t *d = dest.GetBuf(src.Len());
     50     const char *s = (const char *)src;
     51     unsigned i;
     52 
     53     for (i = 0;;)
     54     {
     55       Byte c = (Byte)s[i];
     56       if (c >= 0x80 || c == 0)
     57         break;
     58       d[i++] = (wchar_t)c;
     59     }
     60 
     61     if (i != src.Len())
     62     {
     63       unsigned len = MultiByteToWideChar(codePage, 0, s + i,
     64           src.Len() - i, d + i,
     65           src.Len() + 1 - i);
     66       if (len == 0)
     67         throw 282228;
     68       i += len;
     69     }
     70 
     71     d[i] = 0;
     72     dest.ReleaseBuf_SetLen(i);
     73     */
     74     unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
     75     if (len == 0)
     76     {
     77       if (GetLastError() != 0)
     78         throw 282228;
     79     }
     80     else
     81     {
     82       len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
     83       if (len == 0)
     84         throw 282228;
     85       dest.ReleaseBuf_SetEnd(len);
     86     }
     87   }
     88 }
     89 
     90 /*
     91   int WideCharToMultiByte(
     92       UINT CodePage, DWORD dwFlags,
     93       LPCWSTR lpWideCharStr, int cchWideChar,
     94       LPSTR lpMultiByteStr, int cbMultiByte,
     95       LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar);
     96 
     97 if (lpDefaultChar == NULL),
     98   - it uses system default value.
     99 
    100 if (CodePage == CP_UTF7 || CodePage == CP_UTF8)
    101   if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL)
    102     return: 0. ERR: ERROR_INVALID_PARAMETER.
    103 
    104 The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL)
    105 
    106 */
    107 
    108 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
    109 {
    110   dest.Empty();
    111   defaultCharWasUsed = false;
    112   if (src.IsEmpty())
    113     return;
    114   {
    115     /*
    116     unsigned numRequiredBytes = src.Len() * 2;
    117     char *d = dest.GetBuf(numRequiredBytes);
    118     const wchar_t *s = (const wchar_t *)src;
    119     unsigned i;
    120 
    121     for (i = 0;;)
    122     {
    123       wchar_t c = s[i];
    124       if (c >= 0x80 || c == 0)
    125         break;
    126       d[i++] = (char)c;
    127     }
    128 
    129     if (i != src.Len())
    130     {
    131       BOOL defUsed = FALSE;
    132       defaultChar = defaultChar;
    133 
    134       bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
    135       unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i,
    136           d + i, numRequiredBytes + 1 - i,
    137           (isUtf ? NULL : &defaultChar),
    138           (isUtf ? NULL : &defUsed));
    139       defaultCharWasUsed = (defUsed != FALSE);
    140       if (len == 0)
    141         throw 282229;
    142       i += len;
    143     }
    144 
    145     d[i] = 0;
    146     dest.ReleaseBuf_SetLen(i);
    147     */
    148 
    149     /*
    150     if (codePage != CP_UTF7)
    151     {
    152       const wchar_t *s = (const wchar_t *)src;
    153       unsigned i;
    154       for (i = 0;; i++)
    155       {
    156         wchar_t c = s[i];
    157         if (c >= 0x80 || c == 0)
    158           break;
    159       }
    160 
    161       if (s[i] == 0)
    162       {
    163         char *d = dest.GetBuf(src.Len());
    164         for (i = 0;;)
    165         {
    166           wchar_t c = s[i];
    167           if (c == 0)
    168             break;
    169           d[i++] = (char)c;
    170         }
    171         d[i] = 0;
    172         dest.ReleaseBuf_SetLen(i);
    173         return;
    174       }
    175     }
    176     */
    177 
    178     unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
    179     if (len == 0)
    180     {
    181       if (GetLastError() != 0)
    182         throw 282228;
    183     }
    184     else
    185     {
    186       BOOL defUsed = FALSE;
    187       bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
    188       // defaultChar = defaultChar;
    189       len = WideCharToMultiByte(codePage, 0, src, src.Len(),
    190           dest.GetBuf(len), len,
    191           (isUtf ? NULL : &defaultChar),
    192           (isUtf ? NULL : &defUsed)
    193           );
    194       if (!isUtf)
    195         defaultCharWasUsed = (defUsed != FALSE);
    196       if (len == 0)
    197         throw 282228;
    198       dest.ReleaseBuf_SetEnd(len);
    199     }
    200   }
    201 }
    202 
    203 /*
    204 #ifndef UNDER_CE
    205 AString SystemStringToOemString(const CSysString &src)
    206 {
    207   AString dest;
    208   const unsigned len = src.Len() * 2;
    209   CharToOem(src, dest.GetBuf(len));
    210   dest.ReleaseBuf_CalcLen(len);
    211   return dest;
    212 }
    213 #endif
    214 */
    215 
    216 #else
    217 
    218 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
    219 {
    220   dest.Empty();
    221   if (src.IsEmpty())
    222     return;
    223 
    224   size_t limit = ((size_t)src.Len() + 1) * 2;
    225   wchar_t *d = dest.GetBuf((unsigned)limit);
    226   size_t len = mbstowcs(d, src, limit);
    227   if (len != (size_t)-1)
    228   {
    229     dest.ReleaseBuf_SetEnd((unsigned)len);
    230     return;
    231   }
    232 
    233   {
    234     unsigned i;
    235     const char *s = (const char *)src;
    236     for (i = 0;;)
    237     {
    238       Byte c = (Byte)s[i];
    239       if (c == 0)
    240         break;
    241       d[i++] = (wchar_t)c;
    242     }
    243     d[i] = 0;
    244     dest.ReleaseBuf_SetLen(i);
    245   }
    246 }
    247 
    248 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
    249 {
    250   dest.Empty();
    251   defaultCharWasUsed = false;
    252   if (src.IsEmpty())
    253     return;
    254 
    255   size_t limit = ((size_t)src.Len() + 1) * 6;
    256   char *d = dest.GetBuf((unsigned)limit);
    257   size_t len = wcstombs(d, src, limit);
    258   if (len != (size_t)-1)
    259   {
    260     dest.ReleaseBuf_SetEnd((unsigned)len);
    261     return;
    262   }
    263 
    264   {
    265     const wchar_t *s = (const wchar_t *)src;
    266     unsigned i;
    267     for (i = 0;;)
    268     {
    269       wchar_t c = s[i];
    270       if (c == 0)
    271         break;
    272       if (c >= 0x100)
    273       {
    274         c = defaultChar;
    275         defaultCharWasUsed = true;
    276       }
    277       d[i++] = (char)c;
    278     }
    279     d[i] = 0;
    280     dest.ReleaseBuf_SetLen(i);
    281   }
    282 }
    283 
    284 #endif
    285 
    286 
    287 UString MultiByteToUnicodeString(const AString &src, UINT codePage)
    288 {
    289   UString dest;
    290   MultiByteToUnicodeString2(dest, src, codePage);
    291   return dest;
    292 }
    293 
    294 void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage)
    295 {
    296   bool defaultCharWasUsed;
    297   UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
    298 }
    299 
    300 AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
    301 {
    302   AString dest;
    303   UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed);
    304   return dest;
    305 }
    306 
    307 AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
    308 {
    309   AString dest;
    310   bool defaultCharWasUsed;
    311   UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
    312   return dest;
    313 }
    314