1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "platform/fonts/win/FontFallbackWin.h" 33 34 #include "SkFontMgr.h" 35 #include "SkTypeface.h" 36 #include "wtf/HashMap.h" 37 #include "wtf/text/StringHash.h" 38 #include "wtf/text/WTFString.h" 39 #include <limits> 40 #include <unicode/locid.h> 41 #include <unicode/uchar.h> 42 43 namespace blink { 44 45 namespace { 46 47 static inline bool isFontPresent(const UChar* fontName, SkFontMgr* fontManager) 48 { 49 String family = fontName; 50 RefPtr<SkTypeface> tf = adoptRef(fontManager->legacyCreateTypeface(family.utf8().data(), SkTypeface::kNormal)); 51 if (!tf) 52 return false; 53 54 SkTypeface::LocalizedStrings* actualFamilies = tf->createFamilyNameIterator(); 55 bool matchesRequestedFamily = false; 56 SkTypeface::LocalizedString actualFamily; 57 while (actualFamilies->next(&actualFamily)) { 58 if (equalIgnoringCase(family, AtomicString::fromUTF8(actualFamily.fString.c_str()))) { 59 matchesRequestedFamily = true; 60 break; 61 } 62 } 63 actualFamilies->unref(); 64 65 return matchesRequestedFamily; 66 } 67 68 // A simple mapping from UScriptCode to family name. This is a sparse array, 69 // which works well since the range of UScriptCode values is small. 70 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT]; 71 72 void initializeScriptMonospaceFontMap(ScriptToFontMap& scriptFontMap, SkFontMgr* fontManager) 73 { 74 struct FontMap { 75 UScriptCode script; 76 const UChar* family; 77 }; 78 79 static const FontMap fontMap[] = { 80 { USCRIPT_HEBREW, L"courier new" }, 81 { USCRIPT_ARABIC, L"courier new" }, 82 }; 83 84 for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i) 85 scriptFontMap[fontMap[i].script] = fontMap[i].family; 86 } 87 88 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap, SkFontMgr* fontManager) 89 { 90 struct FontMap { 91 UScriptCode script; 92 const UChar* family; 93 }; 94 95 static const FontMap fontMap[] = { 96 {USCRIPT_LATIN, L"times new roman"}, 97 {USCRIPT_GREEK, L"times new roman"}, 98 {USCRIPT_CYRILLIC, L"times new roman"}, 99 // FIXME: Consider trying new Vista fonts before XP fonts for CJK. 100 // Some Vista users do want to use Vista cleartype CJK fonts. If we 101 // did, the results of tests with CJK characters would have to be 102 // regenerated for Vista. 103 {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, 104 {USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, 105 {USCRIPT_HIRAGANA, L"ms pgothic"}, 106 {USCRIPT_KATAKANA, L"ms pgothic"}, 107 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, 108 {USCRIPT_HANGUL, L"gulim"}, 109 {USCRIPT_THAI, L"tahoma"}, 110 {USCRIPT_HEBREW, L"david"}, 111 {USCRIPT_ARABIC, L"tahoma"}, 112 {USCRIPT_DEVANAGARI, L"mangal"}, 113 {USCRIPT_BENGALI, L"vrinda"}, 114 {USCRIPT_GURMUKHI, L"raavi"}, 115 {USCRIPT_GUJARATI, L"shruti"}, 116 {USCRIPT_TAMIL, L"latha"}, 117 {USCRIPT_TELUGU, L"gautami"}, 118 {USCRIPT_KANNADA, L"tunga"}, 119 {USCRIPT_GEORGIAN, L"sylfaen"}, 120 {USCRIPT_ARMENIAN, L"sylfaen"}, 121 {USCRIPT_THAANA, L"mv boli"}, 122 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, 123 {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, 124 {USCRIPT_MONGOLIAN, L"mongolian balti"}, 125 // For USCRIPT_COMMON, we map blocks to scripts when 126 // that makes sense. 127 }; 128 129 struct ScriptToFontFamilies { 130 UScriptCode script; 131 const UChar** families; 132 }; 133 134 // Kartika on Vista or earlier lacks the support for Chillu 135 // letters added to Unicode 5.1. 136 // Try AnjaliOldLipi (a very widely used Malaylalam font with the full 137 // Unicode 5.x support) before falling back to Kartika. 138 static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0}; 139 // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better 140 // with Latin and looks better/larger for the same size. 141 static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0}; 142 // For the following 6 scripts, two or fonts are listed. The fonts in 143 // the 1st slot are not available on Windows XP. To support these 144 // scripts on XP, listed in the rest of slots are widely used 145 // fonts. 146 static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0}; 147 static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0}; 148 static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0}; 149 static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0}; 150 static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0}; 151 static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0}; 152 // http://www.bethmardutho.org/support/meltho/download/index.php 153 static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0}; 154 // No Myanmar/Burmese font is shipped with Windows, yet. Try a few 155 // widely available/used ones that supports Unicode 5.1 or later. 156 static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0}; 157 158 static const ScriptToFontFamilies scriptToFontFamilies[] = { 159 {USCRIPT_MALAYALAM, malayalamFonts}, 160 {USCRIPT_KHMER, khmerFonts}, 161 {USCRIPT_ETHIOPIC, ethiopicFonts}, 162 {USCRIPT_ORIYA, oriyaFonts}, 163 {USCRIPT_LAO, laoFonts}, 164 {USCRIPT_TIBETAN, tibetanFonts}, 165 {USCRIPT_SINHALA, sinhalaFonts}, 166 {USCRIPT_YI, yiFonts}, 167 {USCRIPT_SYRIAC, syriacFonts}, 168 {USCRIPT_MYANMAR, myanmarFonts}, 169 }; 170 171 for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i) 172 scriptFontMap[fontMap[i].script] = fontMap[i].family; 173 174 // FIXME: Instead of scanning the hard-coded list, we have to 175 // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts 176 // when it's possible (e.g. using OS/2 table). If we do that, this 177 // had better be pulled out of here. 178 for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) { 179 UScriptCode script = scriptToFontFamilies[i].script; 180 scriptFontMap[script] = 0; 181 const UChar** familyPtr = scriptToFontFamilies[i].families; 182 while (*familyPtr) { 183 if (isFontPresent(*familyPtr, fontManager)) { 184 scriptFontMap[script] = *familyPtr; 185 break; 186 } 187 ++familyPtr; 188 } 189 } 190 191 // Initialize the locale-dependent mapping. 192 // Since Chrome synchronizes the ICU default locale with its UI locale, 193 // this ICU locale tells the current UI locale of Chrome. 194 icu::Locale locale = icu::Locale::getDefault(); 195 const UChar* localeFamily = 0; 196 if (locale == icu::Locale::getJapanese()) { 197 localeFamily = scriptFontMap[USCRIPT_HIRAGANA]; 198 } else if (locale == icu::Locale::getKorean()) { 199 localeFamily = scriptFontMap[USCRIPT_HANGUL]; 200 } else if (locale == icu::Locale::getTraditionalChinese()) { 201 localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN]; 202 } else { 203 // For other locales, use the simplified Chinese font for Han. 204 localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN]; 205 } 206 if (localeFamily) 207 scriptFontMap[USCRIPT_HAN] = localeFamily; 208 } 209 210 // There are a lot of characters in USCRIPT_COMMON that can be covered 211 // by fonts for scripts closely related to them. See 212 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] 213 // FIXME: make this more efficient with a wider coverage 214 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4) 215 { 216 UBlockCode block = ublock_getCode(ucs4); 217 switch (block) { 218 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: 219 return USCRIPT_HAN; 220 case UBLOCK_HIRAGANA: 221 case UBLOCK_KATAKANA: 222 return USCRIPT_HIRAGANA; 223 case UBLOCK_ARABIC: 224 return USCRIPT_ARABIC; 225 case UBLOCK_THAI: 226 return USCRIPT_THAI; 227 case UBLOCK_GREEK: 228 return USCRIPT_GREEK; 229 case UBLOCK_DEVANAGARI: 230 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari 231 // font for now although they're used by other scripts as well. 232 // Without a context, we can't do any better. 233 return USCRIPT_DEVANAGARI; 234 case UBLOCK_ARMENIAN: 235 return USCRIPT_ARMENIAN; 236 case UBLOCK_GEORGIAN: 237 return USCRIPT_GEORGIAN; 238 case UBLOCK_KANNADA: 239 return USCRIPT_KANNADA; 240 default: 241 return USCRIPT_COMMON; 242 } 243 } 244 245 UScriptCode getScript(int ucs4) 246 { 247 UErrorCode err = U_ZERO_ERROR; 248 UScriptCode script = uscript_getScript(ucs4, &err); 249 // If script is invalid, common or inherited or there's an error, 250 // infer a script based on the unicode block of a character. 251 if (script <= USCRIPT_INHERITED || U_FAILURE(err)) 252 script = getScriptBasedOnUnicodeBlock(ucs4); 253 return script; 254 } 255 256 } // namespace 257 258 // FIXME: this is font fallback code version 0.1 259 // - Cover all the scripts 260 // - Get the default font for each script/generic family from the 261 // preference instead of hardcoding in the source. 262 // (at least, read values from the registry for IE font settings). 263 // - Support generic families (from FontDescription) 264 // - If the default font for a script is not available, 265 // try some more fonts known to support it. Finally, we can 266 // use EnumFontFamilies or similar APIs to come up with a list of 267 // fonts supporting the script and cache the result. 268 // - Consider using UnicodeSet (or UnicodeMap) converted from 269 // GLYPHSET (BMP) or directly read from truetype cmap tables to 270 // keep track of which character is supported by which font 271 // - Update script_font_cache in response to WM_FONTCHANGE 272 273 const UChar* getFontFamilyForScript(UScriptCode script, 274 FontDescription::GenericFamilyType generic, 275 SkFontMgr* fontManager) 276 { 277 static ScriptToFontMap scriptFontMap; 278 static ScriptToFontMap scriptMonospaceFontMap; 279 static bool initialized = false; 280 if (!initialized) { 281 initializeScriptFontMap(scriptFontMap, fontManager); 282 initializeScriptMonospaceFontMap(scriptMonospaceFontMap, fontManager); 283 initialized = true; 284 } 285 if (script == USCRIPT_INVALID_CODE) 286 return 0; 287 ASSERT(script < USCRIPT_CODE_LIMIT); 288 if (generic == FontDescription::MonospaceFamily && scriptMonospaceFontMap[script]) 289 return scriptMonospaceFontMap[script]; 290 return scriptFontMap[script]; 291 } 292 293 // FIXME: 294 // - Handle 'Inherited', 'Common' and 'Unknown' 295 // (see http://www.unicode.org/reports/tr24/#Usage_Model ) 296 // For 'Inherited' and 'Common', perhaps we need to 297 // accept another parameter indicating the previous family 298 // and just return it. 299 // - All the characters (or characters up to the point a single 300 // font can cover) need to be taken into account 301 const UChar* getFallbackFamily(UChar32 character, 302 FontDescription::GenericFamilyType generic, 303 UScriptCode* scriptChecked, 304 SkFontMgr* fontManager) 305 { 306 ASSERT(character); 307 UScriptCode script = getScript(character); 308 309 // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for 310 // Han (determined in a locale-dependent way above). Full-width ASCII 311 // characters are rather widely used in Japanese and Chinese documents and 312 // they're fully covered by Chinese, Japanese and Korean fonts. 313 if (0xFF00 < character && character < 0xFF5F) 314 script = USCRIPT_HAN; 315 316 if (script == USCRIPT_COMMON) 317 script = getScriptBasedOnUnicodeBlock(character); 318 319 const UChar* family = getFontFamilyForScript(script, generic, fontManager); 320 // Another lame work-around to cover non-BMP characters. 321 // If the font family for script is not found or the character is 322 // not in BMP (> U+FFFF), we resort to the hard-coded list of 323 // fallback fonts for now. 324 if (!family || character > 0xFFFF) { 325 int plane = character >> 16; 326 switch (plane) { 327 case 1: 328 family = L"code2001"; 329 break; 330 case 2: 331 // Use a Traditional Chinese ExtB font if in Traditional Chinese locale. 332 // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese 333 // fonts do support a small subset of ExtB (that are included in JIS X 0213), 334 // but its coverage is rather sparse. 335 // Eventually, this should be controlled by lang/xml:lang. 336 if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese()) 337 family = L"pmingliu-extb"; 338 else 339 family = L"simsun-extb"; 340 break; 341 default: 342 family = L"lucida sans unicode"; 343 } 344 } 345 346 if (scriptChecked) 347 *scriptChecked = script; 348 return family; 349 } 350 351 } // namespace blink 352