1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010, Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "FontUtilsChromiumWin.h" 33 34 #include <limits> 35 36 #include "PlatformString.h" 37 #include "UniscribeHelper.h" 38 #include <unicode/locid.h> 39 #include <unicode/uchar.h> 40 #include <wtf/HashMap.h> 41 #include <wtf/text/StringHash.h> 42 43 namespace WebCore { 44 45 namespace { 46 47 bool isFontPresent(const UChar* fontName) 48 { 49 HFONT hfont = CreateFont(12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50 fontName); 51 if (!hfont) 52 return false; 53 HDC dc = GetDC(0); 54 HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(dc, hfont)); 55 WCHAR actualFontName[LF_FACESIZE]; 56 GetTextFace(dc, LF_FACESIZE, actualFontName); 57 actualFontName[LF_FACESIZE - 1] = 0; 58 SelectObject(dc, oldFont); 59 DeleteObject(hfont); 60 ReleaseDC(0, dc); 61 // We don't have to worry about East Asian fonts with locale-dependent 62 // names here for now. 63 return !wcscmp(fontName, actualFontName); 64 } 65 66 // A simple mapping from UScriptCode to family name. This is a sparse array, 67 // which works well since the range of UScriptCode values is small. 68 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT]; 69 70 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap) 71 { 72 struct FontMap { 73 UScriptCode script; 74 const UChar* family; 75 }; 76 77 static const FontMap fontMap[] = { 78 {USCRIPT_LATIN, L"times new roman"}, 79 {USCRIPT_GREEK, L"times new roman"}, 80 {USCRIPT_CYRILLIC, L"times new roman"}, 81 // FIXME: Consider trying new Vista fonts before XP fonts for CJK. 82 // Some Vista users do want to use Vista cleartype CJK fonts. If we 83 // did, the results of tests with CJK characters would have to be 84 // regenerated for Vista. 85 {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, 86 {USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, 87 {USCRIPT_HIRAGANA, L"ms pgothic"}, 88 {USCRIPT_KATAKANA, L"ms pgothic"}, 89 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, 90 {USCRIPT_HANGUL, L"gulim"}, 91 {USCRIPT_THAI, L"tahoma"}, 92 {USCRIPT_HEBREW, L"david"}, 93 {USCRIPT_ARABIC, L"tahoma"}, 94 {USCRIPT_DEVANAGARI, L"mangal"}, 95 {USCRIPT_BENGALI, L"vrinda"}, 96 {USCRIPT_GURMUKHI, L"raavi"}, 97 {USCRIPT_GUJARATI, L"shruti"}, 98 {USCRIPT_TAMIL, L"latha"}, 99 {USCRIPT_TELUGU, L"gautami"}, 100 {USCRIPT_KANNADA, L"tunga"}, 101 {USCRIPT_GEORGIAN, L"sylfaen"}, 102 {USCRIPT_ARMENIAN, L"sylfaen"}, 103 {USCRIPT_THAANA, L"mv boli"}, 104 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, 105 {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, 106 {USCRIPT_MONGOLIAN, L"mongolian balti"}, 107 // For USCRIPT_COMMON, we map blocks to scripts when 108 // that makes sense. 109 }; 110 111 struct ScriptToFontFamilies { 112 UScriptCode script; 113 const UChar** families; 114 }; 115 116 // Kartika on Vista or earlier lacks the support for Chillu 117 // letters added to Unicode 5.1. 118 // Try AnjaliOldLipi (a very widely used Malaylalam font with the full 119 // Unicode 5.x support) before falling back to Kartika. 120 static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0}; 121 // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better 122 // with Latin and looks better/larger for the same size. 123 static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0}; 124 // For the following 6 scripts, two or fonts are listed. The fonts in 125 // the 1st slot are not available on Windows XP. To support these 126 // scripts on XP, listed in the rest of slots are widely used 127 // fonts. 128 static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0}; 129 static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0}; 130 static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0}; 131 static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0}; 132 static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0}; 133 static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0}; 134 // http://www.bethmardutho.org/support/meltho/download/index.php 135 static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0}; 136 // No Myanmar/Burmese font is shipped with Windows, yet. Try a few 137 // widely available/used ones that supports Unicode 5.1 or later. 138 static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0}; 139 140 static const ScriptToFontFamilies scriptToFontFamilies[] = { 141 {USCRIPT_MALAYALAM, malayalamFonts}, 142 {USCRIPT_KHMER, khmerFonts}, 143 {USCRIPT_ETHIOPIC, ethiopicFonts}, 144 {USCRIPT_ORIYA, oriyaFonts}, 145 {USCRIPT_LAO, laoFonts}, 146 {USCRIPT_TIBETAN, tibetanFonts}, 147 {USCRIPT_SINHALA, sinhalaFonts}, 148 {USCRIPT_YI, yiFonts}, 149 {USCRIPT_SYRIAC, syriacFonts}, 150 {USCRIPT_MYANMAR, myanmarFonts}, 151 }; 152 153 for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i) 154 scriptFontMap[fontMap[i].script] = fontMap[i].family; 155 156 // FIXME: Instead of scanning the hard-coded list, we have to 157 // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts 158 // when it's possible (e.g. using OS/2 table). If we do that, this 159 // had better be pulled out of here. 160 for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) { 161 UScriptCode script = scriptToFontFamilies[i].script; 162 scriptFontMap[script] = 0; 163 const UChar** familyPtr = scriptToFontFamilies[i].families; 164 while (*familyPtr) { 165 if (isFontPresent(*familyPtr)) { 166 scriptFontMap[script] = *familyPtr; 167 break; 168 } 169 ++familyPtr; 170 } 171 } 172 173 // Initialize the locale-dependent mapping. 174 // Since Chrome synchronizes the ICU default locale with its UI locale, 175 // this ICU locale tells the current UI locale of Chrome. 176 icu::Locale locale = icu::Locale::getDefault(); 177 const UChar* localeFamily = 0; 178 if (locale == icu::Locale::getJapanese()) 179 localeFamily = scriptFontMap[USCRIPT_HIRAGANA]; 180 else if (locale == icu::Locale::getKorean()) 181 localeFamily = scriptFontMap[USCRIPT_HANGUL]; 182 else if (locale == icu::Locale::getTraditionalChinese()) 183 localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN]; 184 else { 185 // For other locales, use the simplified Chinese font for Han. 186 localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN]; 187 } 188 if (localeFamily) 189 scriptFontMap[USCRIPT_HAN] = localeFamily; 190 } 191 192 // There are a lot of characters in USCRIPT_COMMON that can be covered 193 // by fonts for scripts closely related to them. See 194 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] 195 // FIXME: make this more efficient with a wider coverage 196 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4) 197 { 198 UBlockCode block = ublock_getCode(ucs4); 199 switch (block) { 200 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: 201 return USCRIPT_HAN; 202 case UBLOCK_HIRAGANA: 203 case UBLOCK_KATAKANA: 204 return USCRIPT_HIRAGANA; 205 case UBLOCK_ARABIC: 206 return USCRIPT_ARABIC; 207 case UBLOCK_THAI: 208 return USCRIPT_THAI; 209 case UBLOCK_GREEK: 210 return USCRIPT_GREEK; 211 case UBLOCK_DEVANAGARI: 212 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari 213 // font for now although they're used by other scripts as well. 214 // Without a context, we can't do any better. 215 return USCRIPT_DEVANAGARI; 216 case UBLOCK_ARMENIAN: 217 return USCRIPT_ARMENIAN; 218 case UBLOCK_GEORGIAN: 219 return USCRIPT_GEORGIAN; 220 case UBLOCK_KANNADA: 221 return USCRIPT_KANNADA; 222 default: 223 return USCRIPT_COMMON; 224 } 225 } 226 227 UScriptCode getScript(int ucs4) 228 { 229 UErrorCode err = U_ZERO_ERROR; 230 UScriptCode script = uscript_getScript(ucs4, &err); 231 // If script is invalid, common or inherited or there's an error, 232 // infer a script based on the unicode block of a character. 233 if (script <= USCRIPT_INHERITED || U_FAILURE(err)) 234 script = getScriptBasedOnUnicodeBlock(ucs4); 235 return script; 236 } 237 238 const int kUndefinedAscent = std::numeric_limits<int>::min(); 239 240 // Given an HFONT, return the ascent. If GetTextMetrics fails, 241 // kUndefinedAscent is returned, instead. 242 int getAscent(HFONT hfont) 243 { 244 HDC dc = GetDC(0); 245 HGDIOBJ oldFont = SelectObject(dc, hfont); 246 TEXTMETRIC tm; 247 BOOL gotMetrics = GetTextMetrics(dc, &tm); 248 SelectObject(dc, oldFont); 249 ReleaseDC(0, dc); 250 return gotMetrics ? tm.tmAscent : kUndefinedAscent; 251 } 252 253 WORD getSpaceGlyph(HFONT hfont) 254 { 255 HDC dc = GetDC(0); 256 HGDIOBJ oldFont = SelectObject(dc, hfont); 257 WCHAR space = L' '; 258 WORD spaceGlyph = 0; 259 GetGlyphIndices(dc, &space, 1, &spaceGlyph, 0); 260 SelectObject(dc, oldFont); 261 ReleaseDC(0, dc); 262 return spaceGlyph; 263 } 264 265 struct FontData { 266 FontData() 267 : hfont(0) 268 , ascent(kUndefinedAscent) 269 , scriptCache(0) 270 , spaceGlyph(0) 271 { 272 } 273 274 HFONT hfont; 275 int ascent; 276 mutable SCRIPT_CACHE scriptCache; 277 WORD spaceGlyph; 278 }; 279 280 // Again, using hash_map does not earn us much here. page_cycler_test intl2 281 // gave us a 'better' result with map than with hash_map even though they're 282 // well-within 1-sigma of each other so that the difference is not significant. 283 // On the other hand, some pages in intl2 seem to take longer to load with map 284 // in the 1st pass. Need to experiment further. 285 typedef HashMap<String, FontData> FontDataCache; 286 287 } // namespace 288 289 // FIXME: this is font fallback code version 0.1 290 // - Cover all the scripts 291 // - Get the default font for each script/generic family from the 292 // preference instead of hardcoding in the source. 293 // (at least, read values from the registry for IE font settings). 294 // - Support generic families (from FontDescription) 295 // - If the default font for a script is not available, 296 // try some more fonts known to support it. Finally, we can 297 // use EnumFontFamilies or similar APIs to come up with a list of 298 // fonts supporting the script and cache the result. 299 // - Consider using UnicodeSet (or UnicodeMap) converted from 300 // GLYPHSET (BMP) or directly read from truetype cmap tables to 301 // keep track of which character is supported by which font 302 // - Update script_font_cache in response to WM_FONTCHANGE 303 304 const UChar* getFontFamilyForScript(UScriptCode script, 305 FontDescription::GenericFamilyType generic) 306 { 307 static ScriptToFontMap scriptFontMap; 308 static bool initialized = false; 309 if (!initialized) { 310 initializeScriptFontMap(scriptFontMap); 311 initialized = true; 312 } 313 if (script == USCRIPT_INVALID_CODE) 314 return 0; 315 ASSERT(script < USCRIPT_CODE_LIMIT); 316 return scriptFontMap[script]; 317 } 318 319 // FIXME: 320 // - Handle 'Inherited', 'Common' and 'Unknown' 321 // (see http://www.unicode.org/reports/tr24/#Usage_Model ) 322 // For 'Inherited' and 'Common', perhaps we need to 323 // accept another parameter indicating the previous family 324 // and just return it. 325 // - All the characters (or characters up to the point a single 326 // font can cover) need to be taken into account 327 const UChar* getFallbackFamily(const UChar* characters, 328 int length, 329 FontDescription::GenericFamilyType generic, 330 UChar32* charChecked, 331 UScriptCode* scriptChecked) 332 { 333 ASSERT(characters && characters[0] && length > 0); 334 UScriptCode script = USCRIPT_COMMON; 335 336 // Sometimes characters common to script (e.g. space) is at 337 // the beginning of a string so that we need to skip them 338 // to get a font required to render the string. 339 int i = 0; 340 UChar32 ucs4 = 0; 341 while (i < length && script == USCRIPT_COMMON) { 342 U16_NEXT(characters, i, length, ucs4); 343 script = getScript(ucs4); 344 } 345 346 // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for 347 // Han (determined in a locale-dependent way above). Full-width ASCII 348 // characters are rather widely used in Japanese and Chinese documents and 349 // they're fully covered by Chinese, Japanese and Korean fonts. 350 if (0xFF00 < ucs4 && ucs4 < 0xFF5F) 351 script = USCRIPT_HAN; 352 353 if (script == USCRIPT_COMMON) 354 script = getScriptBasedOnUnicodeBlock(ucs4); 355 356 const UChar* family = getFontFamilyForScript(script, generic); 357 // Another lame work-around to cover non-BMP characters. 358 // If the font family for script is not found or the character is 359 // not in BMP (> U+FFFF), we resort to the hard-coded list of 360 // fallback fonts for now. 361 if (!family || ucs4 > 0xFFFF) { 362 int plane = ucs4 >> 16; 363 switch (plane) { 364 case 1: 365 family = L"code2001"; 366 break; 367 case 2: 368 // Use a Traditional Chinese ExtB font if in Traditional Chinese locale. 369 // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese 370 // fonts do support a small subset of ExtB (that are included in JIS X 0213), 371 // but its coverage is rather sparse. 372 // Eventually, this should be controlled by lang/xml:lang. 373 if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese()) 374 family = L"pmingliu-extb"; 375 else 376 family = L"simsun-extb"; 377 break; 378 default: 379 family = L"lucida sans unicode"; 380 } 381 } 382 383 if (charChecked) 384 *charChecked = ucs4; 385 if (scriptChecked) 386 *scriptChecked = script; 387 return family; 388 } 389 390 // Be aware that this is not thread-safe. 391 bool getDerivedFontData(const UChar* family, 392 int style, 393 LOGFONT* logfont, 394 int* ascent, 395 HFONT* hfont, 396 SCRIPT_CACHE** scriptCache, 397 WORD* spaceGlyph) 398 { 399 ASSERT(logfont); 400 ASSERT(family); 401 ASSERT(*family); 402 403 // It does not matter that we leak font data when we exit. 404 static FontDataCache fontDataCache; 405 406 // FIXME: This comes up pretty high in the profile so that 407 // we need to measure whether using SHA256 (after coercing all the 408 // fields to char*) is faster than String::format. 409 String fontKey = String::format("%1d:%d:%ls", style, logfont->lfHeight, family); 410 FontDataCache::iterator iter = fontDataCache.find(fontKey); 411 FontData* derived; 412 if (iter == fontDataCache.end()) { 413 ASSERT(wcslen(family) < LF_FACESIZE); 414 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family); 415 // FIXME: CreateFontIndirect always comes up with 416 // a font even if there's no font matching the name. Need to 417 // check it against what we actually want (as is done in 418 // FontCacheWin.cpp) 419 pair<FontDataCache::iterator, bool> entry = fontDataCache.add(fontKey, FontData()); 420 derived = &entry.first->second; 421 derived->hfont = CreateFontIndirect(logfont); 422 // GetAscent may return kUndefinedAscent, but we still want to 423 // cache it so that we won't have to call CreateFontIndirect once 424 // more for HFONT next time. 425 derived->ascent = getAscent(derived->hfont); 426 derived->spaceGlyph = getSpaceGlyph(derived->hfont); 427 } else { 428 derived = &iter->second; 429 // Last time, GetAscent failed so that only HFONT was 430 // cached. Try once more assuming that TryPreloadFont 431 // was called by a caller between calls. 432 if (kUndefinedAscent == derived->ascent) 433 derived->ascent = getAscent(derived->hfont); 434 } 435 *hfont = derived->hfont; 436 *ascent = derived->ascent; 437 *scriptCache = &(derived->scriptCache); 438 *spaceGlyph = derived->spaceGlyph; 439 return *ascent != kUndefinedAscent; 440 } 441 442 int getStyleFromLogfont(const LOGFONT* logfont) 443 { 444 // FIXME: consider defining UNDEFINED or INVALID for style and 445 // returning it when logfont is 0 446 if (!logfont) { 447 ASSERT_NOT_REACHED(); 448 return FontStyleNormal; 449 } 450 return (logfont->lfItalic ? FontStyleItalic : FontStyleNormal) | 451 (logfont->lfUnderline ? FontStyleUnderlined : FontStyleNormal) | 452 (logfont->lfWeight >= 700 ? FontStyleBold : FontStyleNormal); 453 } 454 455 } // namespace WebCore 456