Home | History | Annotate | Download | only in Common
      1 // UTFConvert.cpp
      2 
      3 #include "StdAfx.h"
      4 
      5 #include "MyTypes.h"
      6 #include "UTFConvert.h"
      7 
      8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
      9 
     10 bool CheckUTF8(const char *src) throw()
     11 {
     12   for (;;)
     13   {
     14     Byte c;
     15     unsigned numAdds;
     16     c = *src++;
     17     if (c == 0)
     18       return true;
     19 
     20     if (c < 0x80)
     21       continue;
     22     if (c < 0xC0)
     23       return false;
     24     for (numAdds = 1; numAdds < 5; numAdds++)
     25       if (c < kUtf8Limits[numAdds])
     26         break;
     27     UInt32 value = (c - kUtf8Limits[numAdds - 1]);
     28 
     29     do
     30     {
     31       Byte c2 = *src++;
     32       if (c2 < 0x80 || c2 >= 0xC0)
     33         return false;
     34       value <<= 6;
     35       value |= (c2 - 0x80);
     36     }
     37     while (--numAdds);
     38 
     39     if (value >= 0x110000)
     40       return false;
     41   }
     42 }
     43 
     44 
     45 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) throw()
     46 {
     47   size_t destPos = 0, srcPos = 0;
     48   for (;;)
     49   {
     50     Byte c;
     51     unsigned numAdds;
     52     if (srcPos == srcLen)
     53     {
     54       *destLen = destPos;
     55       return True;
     56     }
     57     c = (Byte)src[srcPos++];
     58 
     59     if (c < 0x80)
     60     {
     61       if (dest)
     62         dest[destPos] = (wchar_t)c;
     63       destPos++;
     64       continue;
     65     }
     66     if (c < 0xC0)
     67       break;
     68     for (numAdds = 1; numAdds < 5; numAdds++)
     69       if (c < kUtf8Limits[numAdds])
     70         break;
     71     UInt32 value = (c - kUtf8Limits[numAdds - 1]);
     72 
     73     do
     74     {
     75       Byte c2;
     76       if (srcPos == srcLen)
     77         break;
     78       c2 = (Byte)src[srcPos++];
     79       if (c2 < 0x80 || c2 >= 0xC0)
     80         break;
     81       value <<= 6;
     82       value |= (c2 - 0x80);
     83     }
     84     while (--numAdds);
     85 
     86     if (value < 0x10000)
     87     {
     88       if (dest)
     89         dest[destPos] = (wchar_t)value;
     90       destPos++;
     91     }
     92     else
     93     {
     94       value -= 0x10000;
     95       if (value >= 0x100000)
     96         break;
     97       if (dest)
     98       {
     99         dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
    100         dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
    101       }
    102       destPos += 2;
    103     }
    104   }
    105   *destLen = destPos;
    106   return False;
    107 }
    108 
    109 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
    110 {
    111   size_t destPos = 0, srcPos = 0;
    112   for (;;)
    113   {
    114     unsigned numAdds;
    115     UInt32 value;
    116     if (srcPos == srcLen)
    117     {
    118       *destLen = destPos;
    119       return True;
    120     }
    121     value = src[srcPos++];
    122     if (value < 0x80)
    123     {
    124       if (dest)
    125         dest[destPos] = (char)value;
    126       destPos++;
    127       continue;
    128     }
    129     if (value >= 0xD800 && value < 0xE000)
    130     {
    131       UInt32 c2;
    132       if (value >= 0xDC00 || srcPos == srcLen)
    133         break;
    134       c2 = src[srcPos++];
    135       if (c2 < 0xDC00 || c2 >= 0xE000)
    136         break;
    137       value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
    138     }
    139     for (numAdds = 1; numAdds < 5; numAdds++)
    140       if (value < (((UInt32)1) << (numAdds * 5 + 6)))
    141         break;
    142     if (dest)
    143       dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
    144     destPos++;
    145     do
    146     {
    147       numAdds--;
    148       if (dest)
    149         dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
    150       destPos++;
    151     }
    152     while (numAdds != 0);
    153   }
    154   *destLen = destPos;
    155   return False;
    156 }
    157 
    158 bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
    159 {
    160   dest.Empty();
    161   size_t destLen = 0;
    162   Utf8_To_Utf16(NULL, &destLen, src, src.Len());
    163   Bool res = Utf8_To_Utf16(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len());
    164   dest.ReleaseBuffer((unsigned)destLen);
    165   return res ? true : false;
    166 }
    167 
    168 bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
    169 {
    170   dest.Empty();
    171   size_t destLen = 0;
    172   Utf16_To_Utf8(NULL, &destLen, src, src.Len());
    173   Bool res = Utf16_To_Utf8(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len());
    174   dest.ReleaseBuffer((unsigned)destLen);
    175   return res ? true : false;
    176 }
    177