1 // Common/StringConvert.cpp 2 3 #include "StdAfx.h" 4 5 #include "StringConvert.h" 6 7 #ifndef _WIN32 8 #include <stdlib.h> 9 #endif 10 11 static const char k_DefultChar = '_'; 12 13 #ifdef _WIN32 14 15 /* 16 MultiByteToWideChar(CodePage, DWORD dwFlags, 17 LPCSTR lpMultiByteStr, int cbMultiByte, 18 LPWSTR lpWideCharStr, int cchWideChar) 19 20 if (cbMultiByte == 0) 21 return: 0. ERR: ERROR_INVALID_PARAMETER 22 23 if (cchWideChar == 0) 24 return: the required buffer size in characters. 25 26 if (supplied buffer size was not large enough) 27 return: 0. ERR: ERROR_INSUFFICIENT_BUFFER 28 The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex) 29 30 If there are illegal characters: 31 if MB_ERR_INVALID_CHARS is set in dwFlags: 32 - the function stops conversion on illegal character. 33 - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION. 34 35 if MB_ERR_INVALID_CHARS is NOT set in dwFlags: 36 before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0. 37 in Vista+: illegal character is not dropped (MSDN). Undocumented: illegal 38 character is converted to U+FFFD, which is REPLACEMENT CHARACTER. 39 */ 40 41 42 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage) 43 { 44 dest.Empty(); 45 if (src.IsEmpty()) 46 return; 47 { 48 /* 49 wchar_t *d = dest.GetBuf(src.Len()); 50 const char *s = (const char *)src; 51 unsigned i; 52 53 for (i = 0;;) 54 { 55 Byte c = (Byte)s[i]; 56 if (c >= 0x80 || c == 0) 57 break; 58 d[i++] = (wchar_t)c; 59 } 60 61 if (i != src.Len()) 62 { 63 unsigned len = MultiByteToWideChar(codePage, 0, s + i, 64 src.Len() - i, d + i, 65 src.Len() + 1 - i); 66 if (len == 0) 67 throw 282228; 68 i += len; 69 } 70 71 d[i] = 0; 72 dest.ReleaseBuf_SetLen(i); 73 */ 74 unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0); 75 if (len == 0) 76 { 77 if (GetLastError() != 0) 78 throw 282228; 79 } 80 else 81 { 82 len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len); 83 if (len == 0) 84 throw 282228; 85 dest.ReleaseBuf_SetEnd(len); 86 } 87 } 88 } 89 90 /* 91 int WideCharToMultiByte( 92 UINT CodePage, DWORD dwFlags, 93 LPCWSTR lpWideCharStr, int cchWideChar, 94 LPSTR lpMultiByteStr, int cbMultiByte, 95 LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar); 96 97 if (lpDefaultChar == NULL), 98 - it uses system default value. 99 100 if (CodePage == CP_UTF7 || CodePage == CP_UTF8) 101 if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL) 102 return: 0. ERR: ERROR_INVALID_PARAMETER. 103 104 The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL) 105 106 */ 107 108 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed) 109 { 110 dest.Empty(); 111 defaultCharWasUsed = false; 112 if (src.IsEmpty()) 113 return; 114 { 115 /* 116 unsigned numRequiredBytes = src.Len() * 2; 117 char *d = dest.GetBuf(numRequiredBytes); 118 const wchar_t *s = (const wchar_t *)src; 119 unsigned i; 120 121 for (i = 0;;) 122 { 123 wchar_t c = s[i]; 124 if (c >= 0x80 || c == 0) 125 break; 126 d[i++] = (char)c; 127 } 128 129 if (i != src.Len()) 130 { 131 BOOL defUsed = FALSE; 132 defaultChar = defaultChar; 133 134 bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7); 135 unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i, 136 d + i, numRequiredBytes + 1 - i, 137 (isUtf ? NULL : &defaultChar), 138 (isUtf ? NULL : &defUsed)); 139 defaultCharWasUsed = (defUsed != FALSE); 140 if (len == 0) 141 throw 282229; 142 i += len; 143 } 144 145 d[i] = 0; 146 dest.ReleaseBuf_SetLen(i); 147 */ 148 149 /* 150 if (codePage != CP_UTF7) 151 { 152 const wchar_t *s = (const wchar_t *)src; 153 unsigned i; 154 for (i = 0;; i++) 155 { 156 wchar_t c = s[i]; 157 if (c >= 0x80 || c == 0) 158 break; 159 } 160 161 if (s[i] == 0) 162 { 163 char *d = dest.GetBuf(src.Len()); 164 for (i = 0;;) 165 { 166 wchar_t c = s[i]; 167 if (c == 0) 168 break; 169 d[i++] = (char)c; 170 } 171 d[i] = 0; 172 dest.ReleaseBuf_SetLen(i); 173 return; 174 } 175 } 176 */ 177 178 unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL); 179 if (len == 0) 180 { 181 if (GetLastError() != 0) 182 throw 282228; 183 } 184 else 185 { 186 BOOL defUsed = FALSE; 187 bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7); 188 // defaultChar = defaultChar; 189 len = WideCharToMultiByte(codePage, 0, src, src.Len(), 190 dest.GetBuf(len), len, 191 (isUtf ? NULL : &defaultChar), 192 (isUtf ? NULL : &defUsed) 193 ); 194 if (!isUtf) 195 defaultCharWasUsed = (defUsed != FALSE); 196 if (len == 0) 197 throw 282228; 198 dest.ReleaseBuf_SetEnd(len); 199 } 200 } 201 } 202 203 /* 204 #ifndef UNDER_CE 205 AString SystemStringToOemString(const CSysString &src) 206 { 207 AString dest; 208 const unsigned len = src.Len() * 2; 209 CharToOem(src, dest.GetBuf(len)); 210 dest.ReleaseBuf_CalcLen(len); 211 return dest; 212 } 213 #endif 214 */ 215 216 #else 217 218 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */) 219 { 220 dest.Empty(); 221 if (src.IsEmpty()) 222 return; 223 224 size_t limit = ((size_t)src.Len() + 1) * 2; 225 wchar_t *d = dest.GetBuf((unsigned)limit); 226 size_t len = mbstowcs(d, src, limit); 227 if (len != (size_t)-1) 228 { 229 dest.ReleaseBuf_SetEnd((unsigned)len); 230 return; 231 } 232 233 { 234 unsigned i; 235 const char *s = (const char *)src; 236 for (i = 0;;) 237 { 238 Byte c = (Byte)s[i]; 239 if (c == 0) 240 break; 241 d[i++] = (wchar_t)c; 242 } 243 d[i] = 0; 244 dest.ReleaseBuf_SetLen(i); 245 } 246 } 247 248 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed) 249 { 250 dest.Empty(); 251 defaultCharWasUsed = false; 252 if (src.IsEmpty()) 253 return; 254 255 size_t limit = ((size_t)src.Len() + 1) * 6; 256 char *d = dest.GetBuf((unsigned)limit); 257 size_t len = wcstombs(d, src, limit); 258 if (len != (size_t)-1) 259 { 260 dest.ReleaseBuf_SetEnd((unsigned)len); 261 return; 262 } 263 264 { 265 const wchar_t *s = (const wchar_t *)src; 266 unsigned i; 267 for (i = 0;;) 268 { 269 wchar_t c = s[i]; 270 if (c == 0) 271 break; 272 if (c >= 0x100) 273 { 274 c = defaultChar; 275 defaultCharWasUsed = true; 276 } 277 d[i++] = (char)c; 278 } 279 d[i] = 0; 280 dest.ReleaseBuf_SetLen(i); 281 } 282 } 283 284 #endif 285 286 287 UString MultiByteToUnicodeString(const AString &src, UINT codePage) 288 { 289 UString dest; 290 MultiByteToUnicodeString2(dest, src, codePage); 291 return dest; 292 } 293 294 UString MultiByteToUnicodeString(const char *src, UINT codePage) 295 { 296 return MultiByteToUnicodeString(AString(src), codePage); 297 } 298 299 300 void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage) 301 { 302 bool defaultCharWasUsed; 303 UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed); 304 } 305 306 AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed) 307 { 308 AString dest; 309 UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed); 310 return dest; 311 } 312 313 AString UnicodeStringToMultiByte(const UString &src, UINT codePage) 314 { 315 AString dest; 316 bool defaultCharWasUsed; 317 UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed); 318 return dest; 319 } 320