Home | History | Annotate | Download | only in Common
      1 // UTFConvert.cpp
      2 
      3 #include "StdAfx.h"
      4 
      5 #include "UTFConvert.h"
      6 #include "Types.h"
      7 
      8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
      9 
     10 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)
     11 {
     12   size_t destPos = 0, srcPos = 0;
     13   for (;;)
     14   {
     15     Byte c;
     16     int numAdds;
     17     if (srcPos == srcLen)
     18     {
     19       *destLen = destPos;
     20       return True;
     21     }
     22     c = (Byte)src[srcPos++];
     23 
     24     if (c < 0x80)
     25     {
     26       if (dest)
     27         dest[destPos] = (wchar_t)c;
     28       destPos++;
     29       continue;
     30     }
     31     if (c < 0xC0)
     32       break;
     33     for (numAdds = 1; numAdds < 5; numAdds++)
     34       if (c < kUtf8Limits[numAdds])
     35         break;
     36     UInt32 value = (c - kUtf8Limits[numAdds - 1]);
     37 
     38     do
     39     {
     40       Byte c2;
     41       if (srcPos == srcLen)
     42         break;
     43       c2 = (Byte)src[srcPos++];
     44       if (c2 < 0x80 || c2 >= 0xC0)
     45         break;
     46       value <<= 6;
     47       value |= (c2 - 0x80);
     48     }
     49     while (--numAdds != 0);
     50 
     51     if (value < 0x10000)
     52     {
     53       if (dest)
     54         dest[destPos] = (wchar_t)value;
     55       destPos++;
     56     }
     57     else
     58     {
     59       value -= 0x10000;
     60       if (value >= 0x100000)
     61         break;
     62       if (dest)
     63       {
     64         dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
     65         dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
     66       }
     67       destPos += 2;
     68     }
     69   }
     70   *destLen = destPos;
     71   return False;
     72 }
     73 
     74 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
     75 {
     76   size_t destPos = 0, srcPos = 0;
     77   for (;;)
     78   {
     79     unsigned numAdds;
     80     UInt32 value;
     81     if (srcPos == srcLen)
     82     {
     83       *destLen = destPos;
     84       return True;
     85     }
     86     value = src[srcPos++];
     87     if (value < 0x80)
     88     {
     89       if (dest)
     90         dest[destPos] = (char)value;
     91       destPos++;
     92       continue;
     93     }
     94     if (value >= 0xD800 && value < 0xE000)
     95     {
     96       UInt32 c2;
     97       if (value >= 0xDC00 || srcPos == srcLen)
     98         break;
     99       c2 = src[srcPos++];
    100       if (c2 < 0xDC00 || c2 >= 0xE000)
    101         break;
    102       value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
    103     }
    104     for (numAdds = 1; numAdds < 5; numAdds++)
    105       if (value < (((UInt32)1) << (numAdds * 5 + 6)))
    106         break;
    107     if (dest)
    108       dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
    109     destPos++;
    110     do
    111     {
    112       numAdds--;
    113       if (dest)
    114         dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
    115       destPos++;
    116     }
    117     while (numAdds != 0);
    118   }
    119   *destLen = destPos;
    120   return False;
    121 }
    122 
    123 bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
    124 {
    125   dest.Empty();
    126   size_t destLen = 0;
    127   Utf8_To_Utf16(NULL, &destLen, src, src.Length());
    128   wchar_t *p = dest.GetBuffer((int)destLen);
    129   Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());
    130   p[destLen] = 0;
    131   dest.ReleaseBuffer();
    132   return res ? true : false;
    133 }
    134 
    135 bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
    136 {
    137   dest.Empty();
    138   size_t destLen = 0;
    139   Utf16_To_Utf8(NULL, &destLen, src, src.Length());
    140   char *p = dest.GetBuffer((int)destLen);
    141   Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());
    142   p[destLen] = 0;
    143   dest.ReleaseBuffer();
    144   return res ? true : false;
    145 }
    146