1 // UTFConvert.cpp 2 3 #include "StdAfx.h" 4 5 #include "UTFConvert.h" 6 #include "Types.h" 7 8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 9 10 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) 11 { 12 size_t destPos = 0, srcPos = 0; 13 for (;;) 14 { 15 Byte c; 16 int numAdds; 17 if (srcPos == srcLen) 18 { 19 *destLen = destPos; 20 return True; 21 } 22 c = (Byte)src[srcPos++]; 23 24 if (c < 0x80) 25 { 26 if (dest) 27 dest[destPos] = (wchar_t)c; 28 destPos++; 29 continue; 30 } 31 if (c < 0xC0) 32 break; 33 for (numAdds = 1; numAdds < 5; numAdds++) 34 if (c < kUtf8Limits[numAdds]) 35 break; 36 UInt32 value = (c - kUtf8Limits[numAdds - 1]); 37 38 do 39 { 40 Byte c2; 41 if (srcPos == srcLen) 42 break; 43 c2 = (Byte)src[srcPos++]; 44 if (c2 < 0x80 || c2 >= 0xC0) 45 break; 46 value <<= 6; 47 value |= (c2 - 0x80); 48 } 49 while (--numAdds != 0); 50 51 if (value < 0x10000) 52 { 53 if (dest) 54 dest[destPos] = (wchar_t)value; 55 destPos++; 56 } 57 else 58 { 59 value -= 0x10000; 60 if (value >= 0x100000) 61 break; 62 if (dest) 63 { 64 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10)); 65 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF)); 66 } 67 destPos += 2; 68 } 69 } 70 *destLen = destPos; 71 return False; 72 } 73 74 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen) 75 { 76 size_t destPos = 0, srcPos = 0; 77 for (;;) 78 { 79 unsigned numAdds; 80 UInt32 value; 81 if (srcPos == srcLen) 82 { 83 *destLen = destPos; 84 return True; 85 } 86 value = src[srcPos++]; 87 if (value < 0x80) 88 { 89 if (dest) 90 dest[destPos] = (char)value; 91 destPos++; 92 continue; 93 } 94 if (value >= 0xD800 && value < 0xE000) 95 { 96 UInt32 c2; 97 if (value >= 0xDC00 || srcPos == srcLen) 98 break; 99 c2 = src[srcPos++]; 100 if (c2 < 0xDC00 || c2 >= 0xE000) 101 break; 102 value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; 103 } 104 for (numAdds = 1; numAdds < 5; numAdds++) 105 if (value < (((UInt32)1) << (numAdds * 5 + 6))) 106 break; 107 if (dest) 108 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); 109 destPos++; 110 do 111 { 112 numAdds--; 113 if (dest) 114 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); 115 destPos++; 116 } 117 while (numAdds != 0); 118 } 119 *destLen = destPos; 120 return False; 121 } 122 123 bool ConvertUTF8ToUnicode(const AString &src, UString &dest) 124 { 125 dest.Empty(); 126 size_t destLen = 0; 127 Utf8_To_Utf16(NULL, &destLen, src, src.Length()); 128 wchar_t *p = dest.GetBuffer((int)destLen); 129 Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length()); 130 p[destLen] = 0; 131 dest.ReleaseBuffer(); 132 return res ? true : false; 133 } 134 135 bool ConvertUnicodeToUTF8(const UString &src, AString &dest) 136 { 137 dest.Empty(); 138 size_t destLen = 0; 139 Utf16_To_Utf8(NULL, &destLen, src, src.Length()); 140 char *p = dest.GetBuffer((int)destLen); 141 Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length()); 142 p[destLen] = 0; 143 dest.ReleaseBuffer(); 144 return res ? true : false; 145 } 146