Home | History | Annotate | Download | only in wince
      1 /*
      2  * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
      3  * Copyright (C) 2010-2011 Patrick Gansterer <paroga (at) paroga.com>
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  *  This library is distributed in the hope that i will be useful,
     15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17  *  Library General Public License for more details.
     18  *
     19  *  You should have received a copy of the GNU Library General Public License
     20  *  along with this library; see the file COPYING.LIB.  If not, write to
     21  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     22  *  Boston, MA 02110-1301, USA.
     23  */
     24 
     25 #include "config.h"
     26 #include "TextCodecWinCE.h"
     27 
     28 #include "FontCache.h"
     29 #include "PlatformString.h"
     30 #include <mlang.h>
     31 #include <winbase.h>
     32 #include <winnls.h>
     33 #include <wtf/HashMap.h>
     34 #include <wtf/HashSet.h>
     35 #include <wtf/text/CString.h>
     36 #include <wtf/text/StringConcatenate.h>
     37 #include <wtf/text/StringHash.h>
     38 
     39 namespace WebCore {
     40 
     41 struct CharsetInfo {
     42     CString m_name;
     43     String m_friendlyName;
     44     UINT m_codePage;
     45     Vector<CString> m_aliases;
     46 };
     47 
     48 class LanguageManager {
     49 private:
     50     LanguageManager();
     51 
     52     friend LanguageManager& languageManager();
     53 };
     54 
     55 // Usage: a lookup table used to get CharsetInfo with code page ID.
     56 // Key: code page ID. Value: charset information.
     57 static HashMap<UINT, CString>& codePageCharsets()
     58 {
     59     static HashMap<UINT, CString> cc;
     60     return cc;
     61 }
     62 
     63 static HashMap<String, CharsetInfo>& knownCharsets()
     64 {
     65     static HashMap<String, CharsetInfo> kc;
     66     return kc;
     67 }
     68 
     69 // Usage: a map that stores charsets that are supported by system. Sorted by name.
     70 // Key: charset. Value: code page ID.
     71 typedef HashSet<String> CharsetSet;
     72 static CharsetSet& supportedCharsets()
     73 {
     74     static CharsetSet sl;
     75     return sl;
     76 }
     77 
     78 static LanguageManager& languageManager()
     79 {
     80     static LanguageManager lm;
     81     return lm;
     82 }
     83 
     84 LanguageManager::LanguageManager()
     85 {
     86     IEnumCodePage* enumInterface;
     87     IMultiLanguage* mli = FontCache::getMultiLanguageInterface();
     88     if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) {
     89         MIMECPINFO cpInfo;
     90         ULONG ccpInfo;
     91         while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) {
     92             if (!IsValidCodePage(cpInfo.uiCodePage))
     93                 continue;
     94 
     95             HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage);
     96 
     97             CString name(String(cpInfo.wszWebCharset).latin1());
     98             if (i == codePageCharsets().end()) {
     99                 CharsetInfo info;
    100                 info.m_codePage = cpInfo.uiCodePage;
    101                 knownCharsets().set(name.data(), info);
    102                 i = codePageCharsets().set(cpInfo.uiCodePage, name).first;
    103             }
    104             if (i != codePageCharsets().end()) {
    105                 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length()));
    106                 ASSERT(j != knownCharsets().end());
    107                 CharsetInfo& info = j->second;
    108                 info.m_name = i->second.data();
    109                 info.m_friendlyName = cpInfo.wszDescription;
    110                 info.m_aliases.append(name);
    111                 info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1());
    112                 info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1());
    113                 String cpName = makeString("cp", String::number(cpInfo.uiCodePage));
    114                 info.m_aliases.append(cpName.latin1());
    115                 supportedCharsets().add(i->second.data());
    116             }
    117         }
    118         enumInterface->Release();
    119     }
    120 }
    121 
    122 static UINT getCodePage(const char* name)
    123 {
    124     // Explicitly use a "const" reference to fix the silly VS build error
    125     // saying "==" is not found for const_iterator and iterator
    126     const HashMap<String, CharsetInfo>& charsets = knownCharsets();
    127     HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name);
    128     return i == charsets.end() ? CP_ACP : i->second.m_codePage;
    129 }
    130 
    131 static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*)
    132 {
    133     return new TextCodecWinCE(getCodePage(encoding.name()));
    134 }
    135 
    136 TextCodecWinCE::TextCodecWinCE(UINT codePage)
    137     : m_codePage(codePage)
    138 {
    139 }
    140 
    141 TextCodecWinCE::~TextCodecWinCE()
    142 {
    143 }
    144 
    145 void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
    146 {
    147     languageManager();
    148     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
    149         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
    150         if (j != knownCharsets().end()) {
    151             registrar(j->second.m_name.data(), j->second.m_name.data());
    152             for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias)
    153                 registrar(alias->data(), j->second.m_name.data());
    154         }
    155     }
    156 }
    157 
    158 void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar)
    159 {
    160     languageManager();
    161     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
    162         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
    163         if (j != knownCharsets().end())
    164             registrar(j->second.m_name.data(), newTextCodecWinCE, 0);
    165     }
    166 }
    167 
    168 static DWORD getCodePageFlags(UINT codePage)
    169 {
    170     if (codePage == 42) // Symbol
    171         return 0;
    172 
    173     // Microsoft says the flag must be 0 for the following code pages
    174     if (codePage > 50000) {
    175         if ((codePage >= 50220 && codePage <= 50222)
    176             || codePage == 50225
    177             || codePage == 50227
    178             || codePage == 50229
    179             || codePage == 52936
    180             || codePage == 54936
    181             || (codePage >= 57002 && codePage <= 57001)
    182             || codePage == 65000 // UTF-7
    183             )
    184             return 0;
    185     }
    186 
    187     return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS;
    188 }
    189 
    190 static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length)
    191 {
    192     for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) {
    193         if (*bytes & 0x80)
    194             break;
    195     }
    196     return bytes;
    197 }
    198 
    199 static void decodeInternal(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left)
    200 {
    201     *left = length;
    202     if (!bytes || !length)
    203         return;
    204 
    205     DWORD flags = getCodePageFlags(codePage);
    206 
    207     int testLength = length;
    208     int untestedLength = length;
    209     for (;;) {
    210         int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0);
    211 
    212         if (resultLength > 0) {
    213             int oldSize = result.size();
    214             result.resize(oldSize + resultLength);
    215 
    216             MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength);
    217 
    218             if (testLength == untestedLength) {
    219                 *left = length - testLength;
    220                 break;
    221             }
    222             untestedLength -= testLength;
    223             length -= testLength;
    224             bytes += testLength;
    225         } else {
    226             untestedLength = testLength - 1;
    227             if (!untestedLength) {
    228                 *left = length;
    229                 break;
    230             }
    231         }
    232         testLength = (untestedLength + 1) / 2;
    233     }
    234 }
    235 
    236 String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
    237 {
    238     if (!m_decodeBuffer.isEmpty()) {
    239         m_decodeBuffer.append(bytes, length);
    240         bytes = m_decodeBuffer.data();
    241         length = m_decodeBuffer.size();
    242     }
    243 
    244     size_t left;
    245     Vector<UChar, 8192> result;
    246     for (;;) {
    247         decodeInternal(result, m_codePage, bytes, length, &left);
    248         if (!left)
    249             break;
    250 
    251         if (!flush && left < 16)
    252             break;
    253 
    254         result.append(L'?');
    255         sawError = true;
    256         if (stopOnError)
    257             return String::adopt(result);
    258 
    259         if (left == 1)
    260             break;
    261 
    262         bytes += length - left + 1;
    263         length = left - 1;
    264     }
    265     if (left && !flush) {
    266         if (m_decodeBuffer.isEmpty())
    267             m_decodeBuffer.append(bytes + length - left, left);
    268         else {
    269             memmove(m_decodeBuffer.data(), bytes + length - left, left);
    270             m_decodeBuffer.resize(left);
    271         }
    272     } else
    273         m_decodeBuffer.clear();
    274 
    275     return String::adopt(result);
    276 }
    277 
    278 CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling)
    279 {
    280     if (!characters || !length)
    281         return CString();
    282 
    283     int resultLength = WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, 0, 0, 0, 0);
    284 
    285     // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables.
    286 
    287     if (resultLength <= 0)
    288         return "?";
    289 
    290     char* characterBuffer;
    291     CString result = CString::newUninitialized(resultLength, characterBuffer);
    292 
    293     WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, characterBuffer, resultLength, 0, 0);
    294 
    295     return result;
    296 }
    297 
    298 void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver)
    299 {
    300     languageManager();
    301     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
    302         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
    303         if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage))
    304             break;
    305     }
    306 }
    307 
    308 } // namespace WebCore
    309