1 /* 2 * Copyright (c) 2006, 2007, 2008, Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "FontUtilsChromiumWin.h" 33 34 #include <limits> 35 36 #include "PlatformString.h" 37 #include "StringHash.h" 38 #include "UniscribeHelper.h" 39 #include <unicode/locid.h> 40 #include <unicode/uchar.h> 41 #include <wtf/HashMap.h> 42 43 namespace WebCore { 44 45 namespace { 46 47 // A simple mapping from UScriptCode to family name. This is a sparse array, 48 // which works well since the range of UScriptCode values is small. 49 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT]; 50 51 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap) 52 { 53 struct FontMap { 54 UScriptCode script; 55 const UChar* family; 56 }; 57 58 const static FontMap fontMap[] = { 59 {USCRIPT_LATIN, L"times new roman"}, 60 {USCRIPT_GREEK, L"times new roman"}, 61 {USCRIPT_CYRILLIC, L"times new roman"}, 62 {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, 63 {USCRIPT_HIRAGANA, L"ms pgothic"}, 64 {USCRIPT_KATAKANA, L"ms pgothic"}, 65 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, 66 {USCRIPT_HANGUL, L"gulim"}, 67 {USCRIPT_THAI, L"tahoma"}, 68 {USCRIPT_HEBREW, L"david"}, 69 {USCRIPT_ARABIC, L"tahoma"}, 70 {USCRIPT_DEVANAGARI, L"mangal"}, 71 {USCRIPT_BENGALI, L"vrinda"}, 72 {USCRIPT_GURMUKHI, L"raavi"}, 73 {USCRIPT_GUJARATI, L"shruti"}, 74 {USCRIPT_ORIYA, L"kalinga"}, 75 {USCRIPT_TAMIL, L"latha"}, 76 {USCRIPT_TELUGU, L"gautami"}, 77 {USCRIPT_KANNADA, L"tunga"}, 78 {USCRIPT_MALAYALAM, L"kartika"}, 79 {USCRIPT_LAO, L"dokchampa"}, 80 {USCRIPT_TIBETAN, L"microsoft himalaya"}, 81 {USCRIPT_GEORGIAN, L"sylfaen"}, 82 {USCRIPT_ARMENIAN, L"sylfaen"}, 83 {USCRIPT_ETHIOPIC, L"nyala"}, 84 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, 85 {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, 86 {USCRIPT_YI, L"microsoft yi balti"}, 87 {USCRIPT_SINHALA, L"iskoola pota"}, 88 {USCRIPT_SYRIAC, L"estrangelo edessa"}, 89 {USCRIPT_KHMER, L"daunpenh"}, 90 {USCRIPT_THAANA, L"mv boli"}, 91 {USCRIPT_MONGOLIAN, L"mongolian balti"}, 92 {USCRIPT_MYANMAR, L"padauk"}, 93 // For USCRIPT_COMMON, we map blocks to scripts when 94 // that makes sense. 95 }; 96 97 for (int i = 0; i < sizeof(fontMap) / sizeof(fontMap[0]); ++i) 98 scriptFontMap[fontMap[i].script] = fontMap[i].family; 99 100 // Initialize the locale-dependent mapping. 101 // Since Chrome synchronizes the ICU default locale with its UI locale, 102 // this ICU locale tells the current UI locale of Chrome. 103 icu::Locale locale = icu::Locale::getDefault(); 104 const UChar* localeFamily = 0; 105 if (locale == icu::Locale::getJapanese()) 106 localeFamily = scriptFontMap[USCRIPT_HIRAGANA]; 107 else if (locale == icu::Locale::getKorean()) 108 localeFamily = scriptFontMap[USCRIPT_HANGUL]; 109 else { 110 // Use Simplified Chinese font for all other locales including 111 // Traditional Chinese because Simsun (SC font) has a wider 112 // coverage (covering both SC and TC) than PMingLiu (TC font). 113 // Note that |fontMap| does not have a separate entry for 114 // USCRIPT_TRADITIONAL_HAN for that reason. 115 // This also speeds up the TC version of Chrome when rendering SC 116 // pages. 117 localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN]; 118 } 119 if (localeFamily) 120 scriptFontMap[USCRIPT_HAN] = localeFamily; 121 } 122 123 // There are a lot of characters in USCRIPT_COMMON that can be covered 124 // by fonts for scripts closely related to them. See 125 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] 126 // FIXME: make this more efficient with a wider coverage 127 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4) 128 { 129 UBlockCode block = ublock_getCode(ucs4); 130 switch (block) { 131 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: 132 return USCRIPT_HAN; 133 case UBLOCK_HIRAGANA: 134 case UBLOCK_KATAKANA: 135 return USCRIPT_HIRAGANA; 136 case UBLOCK_ARABIC: 137 return USCRIPT_ARABIC; 138 case UBLOCK_THAI: 139 return USCRIPT_THAI; 140 case UBLOCK_GREEK: 141 return USCRIPT_GREEK; 142 case UBLOCK_DEVANAGARI: 143 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari 144 // font for now although they're used by other scripts as well. 145 // Without a context, we can't do any better. 146 return USCRIPT_DEVANAGARI; 147 case UBLOCK_ARMENIAN: 148 return USCRIPT_ARMENIAN; 149 case UBLOCK_GEORGIAN: 150 return USCRIPT_GEORGIAN; 151 case UBLOCK_KANNADA: 152 return USCRIPT_KANNADA; 153 default: 154 return USCRIPT_COMMON; 155 } 156 } 157 158 UScriptCode getScript(int ucs4) 159 { 160 UErrorCode err = U_ZERO_ERROR; 161 UScriptCode script = uscript_getScript(ucs4, &err); 162 // If script is invalid, common or inherited or there's an error, 163 // infer a script based on the unicode block of a character. 164 if (script <= USCRIPT_INHERITED || U_FAILURE(err)) 165 script = getScriptBasedOnUnicodeBlock(ucs4); 166 return script; 167 } 168 169 const int kUndefinedAscent = std::numeric_limits<int>::min(); 170 171 // Given an HFONT, return the ascent. If GetTextMetrics fails, 172 // kUndefinedAscent is returned, instead. 173 int getAscent(HFONT hfont) 174 { 175 HDC dc = GetDC(0); 176 HGDIOBJ oldFont = SelectObject(dc, hfont); 177 TEXTMETRIC tm; 178 BOOL gotMetrics = GetTextMetrics(dc, &tm); 179 SelectObject(dc, oldFont); 180 ReleaseDC(0, dc); 181 return gotMetrics ? tm.tmAscent : kUndefinedAscent; 182 } 183 184 struct FontData { 185 FontData() 186 : hfont(0) 187 , ascent(kUndefinedAscent) 188 , scriptCache(0) 189 { 190 } 191 192 HFONT hfont; 193 int ascent; 194 mutable SCRIPT_CACHE scriptCache; 195 }; 196 197 // Again, using hash_map does not earn us much here. page_cycler_test intl2 198 // gave us a 'better' result with map than with hash_map even though they're 199 // well-within 1-sigma of each other so that the difference is not significant. 200 // On the other hand, some pages in intl2 seem to take longer to load with map 201 // in the 1st pass. Need to experiment further. 202 typedef HashMap<String, FontData> FontDataCache; 203 204 } // namespace 205 206 // FIXME: this is font fallback code version 0.1 207 // - Cover all the scripts 208 // - Get the default font for each script/generic family from the 209 // preference instead of hardcoding in the source. 210 // (at least, read values from the registry for IE font settings). 211 // - Support generic families (from FontDescription) 212 // - If the default font for a script is not available, 213 // try some more fonts known to support it. Finally, we can 214 // use EnumFontFamilies or similar APIs to come up with a list of 215 // fonts supporting the script and cache the result. 216 // - Consider using UnicodeSet (or UnicodeMap) converted from 217 // GLYPHSET (BMP) or directly read from truetype cmap tables to 218 // keep track of which character is supported by which font 219 // - Update script_font_cache in response to WM_FONTCHANGE 220 221 const UChar* getFontFamilyForScript(UScriptCode script, 222 FontDescription::GenericFamilyType generic) 223 { 224 static ScriptToFontMap scriptFontMap; 225 static bool initialized = false; 226 if (!initialized) { 227 initializeScriptFontMap(scriptFontMap); 228 initialized = true; 229 } 230 if (script == USCRIPT_INVALID_CODE) 231 return 0; 232 ASSERT(script < USCRIPT_CODE_LIMIT); 233 return scriptFontMap[script]; 234 } 235 236 // FIXME: 237 // - Handle 'Inherited', 'Common' and 'Unknown' 238 // (see http://www.unicode.org/reports/tr24/#Usage_Model ) 239 // For 'Inherited' and 'Common', perhaps we need to 240 // accept another parameter indicating the previous family 241 // and just return it. 242 // - All the characters (or characters up to the point a single 243 // font can cover) need to be taken into account 244 const UChar* getFallbackFamily(const UChar* characters, 245 int length, 246 FontDescription::GenericFamilyType generic, 247 UChar32* charChecked, 248 UScriptCode* scriptChecked) 249 { 250 ASSERT(characters && characters[0] && length > 0); 251 UScriptCode script = USCRIPT_COMMON; 252 253 // Sometimes characters common to script (e.g. space) is at 254 // the beginning of a string so that we need to skip them 255 // to get a font required to render the string. 256 int i = 0; 257 UChar32 ucs4 = 0; 258 while (i < length && script == USCRIPT_COMMON) { 259 U16_NEXT(characters, i, length, ucs4); 260 script = getScript(ucs4); 261 } 262 263 // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for 264 // Han (determined in a locale-dependent way above). Full-width ASCII 265 // characters are rather widely used in Japanese and Chinese documents and 266 // they're fully covered by Chinese, Japanese and Korean fonts. 267 if (0xFF00 < ucs4 && ucs4 < 0xFF5F) 268 script = USCRIPT_HAN; 269 270 if (script == USCRIPT_COMMON) 271 script = getScriptBasedOnUnicodeBlock(ucs4); 272 273 // Another lame work-around to cover non-BMP characters. 274 const UChar* family = getFontFamilyForScript(script, generic); 275 if (!family) { 276 int plane = ucs4 >> 16; 277 switch (plane) { 278 case 1: 279 family = L"code2001"; 280 break; 281 case 2: 282 family = L"simsun-extb"; 283 break; 284 default: 285 family = L"lucida sans unicode"; 286 } 287 } 288 289 if (charChecked) 290 *charChecked = ucs4; 291 if (scriptChecked) 292 *scriptChecked = script; 293 return family; 294 } 295 296 // Be aware that this is not thread-safe. 297 bool getDerivedFontData(const UChar* family, 298 int style, 299 LOGFONT* logfont, 300 int* ascent, 301 HFONT* hfont, 302 SCRIPT_CACHE** scriptCache) 303 { 304 ASSERT(logfont); 305 ASSERT(family); 306 ASSERT(*family); 307 308 // It does not matter that we leak font data when we exit. 309 static FontDataCache fontDataCache; 310 311 // FIXME: This comes up pretty high in the profile so that 312 // we need to measure whether using SHA256 (after coercing all the 313 // fields to char*) is faster than String::format. 314 String fontKey = String::format("%1d:%d:%ls", style, logfont->lfHeight, family); 315 FontDataCache::iterator iter = fontDataCache.find(fontKey); 316 FontData* derived; 317 if (iter == fontDataCache.end()) { 318 ASSERT(wcslen(family) < LF_FACESIZE); 319 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family); 320 // FIXME: CreateFontIndirect always comes up with 321 // a font even if there's no font matching the name. Need to 322 // check it against what we actually want (as is done in 323 // FontCacheWin.cpp) 324 pair<FontDataCache::iterator, bool> entry = fontDataCache.add(fontKey, FontData()); 325 derived = &entry.first->second; 326 derived->hfont = CreateFontIndirect(logfont); 327 // GetAscent may return kUndefinedAscent, but we still want to 328 // cache it so that we won't have to call CreateFontIndirect once 329 // more for HFONT next time. 330 derived->ascent = getAscent(derived->hfont); 331 } else { 332 derived = &iter->second; 333 // Last time, GetAscent failed so that only HFONT was 334 // cached. Try once more assuming that TryPreloadFont 335 // was called by a caller between calls. 336 if (kUndefinedAscent == derived->ascent) 337 derived->ascent = getAscent(derived->hfont); 338 } 339 *hfont = derived->hfont; 340 *ascent = derived->ascent; 341 *scriptCache = &(derived->scriptCache); 342 return *ascent != kUndefinedAscent; 343 } 344 345 int getStyleFromLogfont(const LOGFONT* logfont) 346 { 347 // FIXME: consider defining UNDEFINED or INVALID for style and 348 // returning it when logfont is 0 349 if (!logfont) { 350 ASSERT_NOT_REACHED(); 351 return FontStyleNormal; 352 } 353 return (logfont->lfItalic ? FontStyleItalic : FontStyleNormal) | 354 (logfont->lfUnderline ? FontStyleUnderlined : FontStyleNormal) | 355 (logfont->lfWeight >= 700 ? FontStyleBold : FontStyleNormal); 356 } 357 358 } // namespace WebCore 359