1 // UTFConvert.cpp 2 3 #include "StdAfx.h" 4 5 #include "MyTypes.h" 6 #include "UTFConvert.h" 7 8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 9 10 bool CheckUTF8(const char *src) throw() 11 { 12 for (;;) 13 { 14 Byte c; 15 unsigned numAdds; 16 c = *src++; 17 if (c == 0) 18 return true; 19 20 if (c < 0x80) 21 continue; 22 if (c < 0xC0) 23 return false; 24 for (numAdds = 1; numAdds < 5; numAdds++) 25 if (c < kUtf8Limits[numAdds]) 26 break; 27 UInt32 value = (c - kUtf8Limits[numAdds - 1]); 28 29 do 30 { 31 Byte c2 = *src++; 32 if (c2 < 0x80 || c2 >= 0xC0) 33 return false; 34 value <<= 6; 35 value |= (c2 - 0x80); 36 } 37 while (--numAdds); 38 39 if (value >= 0x110000) 40 return false; 41 } 42 } 43 44 45 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) throw() 46 { 47 size_t destPos = 0, srcPos = 0; 48 for (;;) 49 { 50 Byte c; 51 unsigned numAdds; 52 if (srcPos == srcLen) 53 { 54 *destLen = destPos; 55 return True; 56 } 57 c = (Byte)src[srcPos++]; 58 59 if (c < 0x80) 60 { 61 if (dest) 62 dest[destPos] = (wchar_t)c; 63 destPos++; 64 continue; 65 } 66 if (c < 0xC0) 67 break; 68 for (numAdds = 1; numAdds < 5; numAdds++) 69 if (c < kUtf8Limits[numAdds]) 70 break; 71 UInt32 value = (c - kUtf8Limits[numAdds - 1]); 72 73 do 74 { 75 Byte c2; 76 if (srcPos == srcLen) 77 break; 78 c2 = (Byte)src[srcPos++]; 79 if (c2 < 0x80 || c2 >= 0xC0) 80 break; 81 value <<= 6; 82 value |= (c2 - 0x80); 83 } 84 while (--numAdds); 85 86 if (value < 0x10000) 87 { 88 if (dest) 89 dest[destPos] = (wchar_t)value; 90 destPos++; 91 } 92 else 93 { 94 value -= 0x10000; 95 if (value >= 0x100000) 96 break; 97 if (dest) 98 { 99 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10)); 100 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF)); 101 } 102 destPos += 2; 103 } 104 } 105 *destLen = destPos; 106 return False; 107 } 108 109 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen) 110 { 111 size_t destPos = 0, srcPos = 0; 112 for (;;) 113 { 114 unsigned numAdds; 115 UInt32 value; 116 if (srcPos == srcLen) 117 { 118 *destLen = destPos; 119 return True; 120 } 121 value = src[srcPos++]; 122 if (value < 0x80) 123 { 124 if (dest) 125 dest[destPos] = (char)value; 126 destPos++; 127 continue; 128 } 129 if (value >= 0xD800 && value < 0xE000) 130 { 131 UInt32 c2; 132 if (value >= 0xDC00 || srcPos == srcLen) 133 break; 134 c2 = src[srcPos++]; 135 if (c2 < 0xDC00 || c2 >= 0xE000) 136 break; 137 value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; 138 } 139 for (numAdds = 1; numAdds < 5; numAdds++) 140 if (value < (((UInt32)1) << (numAdds * 5 + 6))) 141 break; 142 if (dest) 143 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); 144 destPos++; 145 do 146 { 147 numAdds--; 148 if (dest) 149 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); 150 destPos++; 151 } 152 while (numAdds != 0); 153 } 154 *destLen = destPos; 155 return False; 156 } 157 158 bool ConvertUTF8ToUnicode(const AString &src, UString &dest) 159 { 160 dest.Empty(); 161 size_t destLen = 0; 162 Utf8_To_Utf16(NULL, &destLen, src, src.Len()); 163 Bool res = Utf8_To_Utf16(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len()); 164 dest.ReleaseBuffer((unsigned)destLen); 165 return res ? true : false; 166 } 167 168 bool ConvertUnicodeToUTF8(const UString &src, AString &dest) 169 { 170 dest.Empty(); 171 size_t destLen = 0; 172 Utf16_To_Utf8(NULL, &destLen, src, src.Len()); 173 Bool res = Utf16_To_Utf8(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len()); 174 dest.ReleaseBuffer((unsigned)destLen); 175 return res ? true : false; 176 } 177