1 /* 2 * Copyright (C) 2006, 2007 Apple Inc. All rights reserved. 3 * Copyright (C) 2007-2009 Torch Mobile, Inc. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "TextEncodingRegistry.h" 29 30 #include "PlatformString.h" 31 #include "TextCodecLatin1.h" 32 #include "TextCodecUserDefined.h" 33 #include "TextCodecUTF16.h" 34 #include <wtf/ASCIICType.h> 35 #include <wtf/Assertions.h> 36 #include <wtf/HashFunctions.h> 37 #include <wtf/HashMap.h> 38 #include <wtf/StdLibExtras.h> 39 #include <wtf/StringExtras.h> 40 #include <wtf/Threading.h> 41 42 #if USE(ICU_UNICODE) 43 #include "TextCodecICU.h" 44 #endif 45 #if PLATFORM(MAC) 46 #include "TextCodecMac.h" 47 #endif 48 #if PLATFORM(QT) 49 #include "qt/TextCodecQt.h" 50 #endif 51 #if USE(GLIB_UNICODE) 52 #include "gtk/TextCodecGtk.h" 53 #endif 54 #if OS(WINCE) && !PLATFORM(QT) 55 #include "TextCodecWince.h" 56 #endif 57 58 using namespace WTF; 59 60 namespace WebCore { 61 62 const size_t maxEncodingNameLength = 63; 63 64 // Hash for all-ASCII strings that does case folding and skips any characters 65 // that are not alphanumeric. If passed any non-ASCII characters, depends on 66 // the behavior of isalnum -- if that returns false as it does on OS X, then 67 // it will properly skip those characters too. 68 struct TextEncodingNameHash { 69 70 static bool equal(const char* s1, const char* s2) 71 { 72 char c1; 73 char c2; 74 do { 75 do 76 c1 = *s1++; 77 while (c1 && !isASCIIAlphanumeric(c1)); 78 do 79 c2 = *s2++; 80 while (c2 && !isASCIIAlphanumeric(c2)); 81 if (toASCIILower(c1) != toASCIILower(c2)) 82 return false; 83 } while (c1 && c2); 84 return !c1 && !c2; 85 } 86 87 // This algorithm is the one-at-a-time hash from: 88 // http://burtleburtle.net/bob/hash/hashfaq.html 89 // http://burtleburtle.net/bob/hash/doobs.html 90 static unsigned hash(const char* s) 91 { 92 unsigned h = WTF::stringHashingStartValue; 93 for (;;) { 94 char c; 95 do { 96 c = *s++; 97 if (!c) { 98 h += (h << 3); 99 h ^= (h >> 11); 100 h += (h << 15); 101 return h; 102 } 103 } while (!isASCIIAlphanumeric(c)); 104 h += toASCIILower(c); 105 h += (h << 10); 106 h ^= (h >> 6); 107 } 108 } 109 110 static const bool safeToCompareToEmptyOrDeleted = false; 111 }; 112 113 struct TextCodecFactory { 114 NewTextCodecFunction function; 115 const void* additionalData; 116 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { } 117 }; 118 119 typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap; 120 typedef HashMap<const char*, TextCodecFactory> TextCodecMap; 121 122 static Mutex& encodingRegistryMutex() 123 { 124 // We don't have to use AtomicallyInitializedStatic here because 125 // this function is called on the main thread for any page before 126 // it is used in worker threads. 127 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); 128 return mutex; 129 } 130 131 static TextEncodingNameMap* textEncodingNameMap; 132 static TextCodecMap* textCodecMap; 133 static bool didExtendTextCodecMaps; 134 135 static const char* const textEncodingNameBlacklist[] = { 136 "UTF-7" 137 }; 138 139 #if ERROR_DISABLED 140 141 static inline void checkExistingName(const char*, const char*) { } 142 143 #else 144 145 static void checkExistingName(const char* alias, const char* atomicName) 146 { 147 const char* oldAtomicName = textEncodingNameMap->get(alias); 148 if (!oldAtomicName) 149 return; 150 if (oldAtomicName == atomicName) 151 return; 152 // Keep the warning silent about one case where we know this will happen. 153 if (strcmp(alias, "ISO-8859-8-I") == 0 154 && strcmp(oldAtomicName, "ISO-8859-8-I") == 0 155 && strcasecmp(atomicName, "iso-8859-8") == 0) 156 return; 157 LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", 158 alias, oldAtomicName, atomicName); 159 } 160 161 #endif 162 163 static void addToTextEncodingNameMap(const char* alias, const char* name) 164 { 165 ASSERT(strlen(alias) <= maxEncodingNameLength); 166 const char* atomicName = textEncodingNameMap->get(name); 167 ASSERT(strcmp(alias, name) == 0 || atomicName); 168 if (!atomicName) 169 atomicName = name; 170 checkExistingName(alias, atomicName); 171 textEncodingNameMap->add(alias, atomicName); 172 } 173 174 static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData) 175 { 176 const char* atomicName = textEncodingNameMap->get(name); 177 ASSERT(atomicName); 178 textCodecMap->add(atomicName, TextCodecFactory(function, additionalData)); 179 } 180 181 static void pruneBlacklistedCodecs() 182 { 183 size_t blacklistedCodecListLength = sizeof(textEncodingNameBlacklist) / sizeof(textEncodingNameBlacklist[0]); 184 for (size_t i = 0; i < blacklistedCodecListLength; ++i) { 185 const char* atomicName = textEncodingNameMap->get(textEncodingNameBlacklist[i]); 186 if (!atomicName) 187 continue; 188 189 Vector<const char*> names; 190 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); 191 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); 192 for (; it != end; ++it) { 193 if (it->second == atomicName) 194 names.append(it->first); 195 } 196 197 size_t length = names.size(); 198 for (size_t j = 0; j < length; ++j) 199 textEncodingNameMap->remove(names[j]); 200 201 textCodecMap->remove(atomicName); 202 } 203 } 204 205 static void buildBaseTextCodecMaps() 206 { 207 ASSERT(isMainThread()); 208 ASSERT(!textCodecMap); 209 ASSERT(!textEncodingNameMap); 210 211 textCodecMap = new TextCodecMap; 212 textEncodingNameMap = new TextEncodingNameMap; 213 214 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap); 215 TextCodecLatin1::registerCodecs(addToTextCodecMap); 216 217 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap); 218 TextCodecUTF16::registerCodecs(addToTextCodecMap); 219 220 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap); 221 TextCodecUserDefined::registerCodecs(addToTextCodecMap); 222 223 #if USE(ICU_UNICODE) 224 TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap); 225 TextCodecICU::registerBaseCodecs(addToTextCodecMap); 226 #endif 227 228 #if USE(GLIB_UNICODE) 229 TextCodecGtk::registerBaseEncodingNames(addToTextEncodingNameMap); 230 TextCodecGtk::registerBaseCodecs(addToTextCodecMap); 231 #endif 232 233 #if OS(WINCE) && !PLATFORM(QT) 234 TextCodecWince::registerBaseEncodingNames(addToTextEncodingNameMap); 235 TextCodecWince::registerBaseCodecs(addToTextCodecMap); 236 #endif 237 } 238 239 static void extendTextCodecMaps() 240 { 241 #if USE(ICU_UNICODE) 242 TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap); 243 TextCodecICU::registerExtendedCodecs(addToTextCodecMap); 244 #endif 245 246 #if USE(QT4_UNICODE) 247 TextCodecQt::registerEncodingNames(addToTextEncodingNameMap); 248 TextCodecQt::registerCodecs(addToTextCodecMap); 249 #endif 250 251 #if PLATFORM(MAC) 252 TextCodecMac::registerEncodingNames(addToTextEncodingNameMap); 253 TextCodecMac::registerCodecs(addToTextCodecMap); 254 #endif 255 256 #if USE(GLIB_UNICODE) 257 TextCodecGtk::registerExtendedEncodingNames(addToTextEncodingNameMap); 258 TextCodecGtk::registerExtendedCodecs(addToTextCodecMap); 259 #endif 260 261 #if OS(WINCE) && !PLATFORM(QT) 262 TextCodecWince::registerExtendedEncodingNames(addToTextEncodingNameMap); 263 TextCodecWince::registerExtendedCodecs(addToTextCodecMap); 264 #endif 265 266 pruneBlacklistedCodecs(); 267 } 268 269 PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding) 270 { 271 MutexLocker lock(encodingRegistryMutex()); 272 273 ASSERT(textCodecMap); 274 TextCodecFactory factory = textCodecMap->get(encoding.name()); 275 ASSERT(factory.function); 276 return factory.function(encoding, factory.additionalData); 277 } 278 279 const char* atomicCanonicalTextEncodingName(const char* name) 280 { 281 if (!name || !name[0]) 282 return 0; 283 if (!textEncodingNameMap) 284 buildBaseTextCodecMaps(); 285 286 MutexLocker lock(encodingRegistryMutex()); 287 288 if (const char* atomicName = textEncodingNameMap->get(name)) 289 return atomicName; 290 if (didExtendTextCodecMaps) 291 return 0; 292 extendTextCodecMaps(); 293 didExtendTextCodecMaps = true; 294 return textEncodingNameMap->get(name); 295 } 296 297 const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length) 298 { 299 char buffer[maxEncodingNameLength + 1]; 300 size_t j = 0; 301 for (size_t i = 0; i < length; ++i) { 302 UChar c = characters[i]; 303 if (isASCIIAlphanumeric(c)) { 304 if (j == maxEncodingNameLength) 305 return 0; 306 buffer[j++] = c; 307 } 308 } 309 buffer[j] = 0; 310 return atomicCanonicalTextEncodingName(buffer); 311 } 312 313 bool noExtendedTextEncodingNameUsed() 314 { 315 // If the calling thread did not use extended encoding names, it is fine for it to use a stale false value. 316 return !didExtendTextCodecMaps; 317 } 318 319 } // namespace WebCore 320