Home | History | Annotate | Download | only in win
      1 /*
      2  * Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 #include "platform/fonts/win/FontFallbackWin.h"
     33 
     34 #include "SkFontMgr.h"
     35 #include "SkTypeface.h"
     36 #include "wtf/HashMap.h"
     37 #include "wtf/text/StringHash.h"
     38 #include "wtf/text/WTFString.h"
     39 #include <limits>
     40 #include <unicode/locid.h>
     41 #include <unicode/uchar.h>
     42 
     43 namespace blink {
     44 
     45 namespace {
     46 
     47 static inline bool isFontPresent(const UChar* fontName, SkFontMgr* fontManager)
     48 {
     49     String family = fontName;
     50     RefPtr<SkTypeface> tf = adoptRef(fontManager->legacyCreateTypeface(family.utf8().data(), SkTypeface::kNormal));
     51     if (!tf)
     52         return false;
     53 
     54     SkTypeface::LocalizedStrings* actualFamilies = tf->createFamilyNameIterator();
     55     bool matchesRequestedFamily = false;
     56     SkTypeface::LocalizedString actualFamily;
     57     while (actualFamilies->next(&actualFamily)) {
     58         if (equalIgnoringCase(family, AtomicString::fromUTF8(actualFamily.fString.c_str()))) {
     59             matchesRequestedFamily = true;
     60             break;
     61         }
     62     }
     63     actualFamilies->unref();
     64 
     65     return matchesRequestedFamily;
     66 }
     67 
     68 // A simple mapping from UScriptCode to family name. This is a sparse array,
     69 // which works well since the range of UScriptCode values is small.
     70 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
     71 
     72 void initializeScriptMonospaceFontMap(ScriptToFontMap& scriptFontMap, SkFontMgr* fontManager)
     73 {
     74     struct FontMap {
     75         UScriptCode script;
     76         const UChar* family;
     77     };
     78 
     79     static const FontMap fontMap[] = {
     80         { USCRIPT_HEBREW, L"courier new" },
     81         { USCRIPT_ARABIC, L"courier new" },
     82     };
     83 
     84     for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
     85         scriptFontMap[fontMap[i].script] = fontMap[i].family;
     86 }
     87 
     88 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap, SkFontMgr* fontManager)
     89 {
     90     struct FontMap {
     91         UScriptCode script;
     92         const UChar* family;
     93     };
     94 
     95     static const FontMap fontMap[] = {
     96         {USCRIPT_LATIN, L"times new roman"},
     97         {USCRIPT_GREEK, L"times new roman"},
     98         {USCRIPT_CYRILLIC, L"times new roman"},
     99         // FIXME: Consider trying new Vista fonts before XP fonts for CJK.
    100         // Some Vista users do want to use Vista cleartype CJK fonts. If we
    101         // did, the results of tests with CJK characters would have to be
    102         // regenerated for Vista.
    103         {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
    104         {USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
    105         {USCRIPT_HIRAGANA, L"ms pgothic"},
    106         {USCRIPT_KATAKANA, L"ms pgothic"},
    107         {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
    108         {USCRIPT_HANGUL, L"gulim"},
    109         {USCRIPT_THAI, L"tahoma"},
    110         {USCRIPT_HEBREW, L"david"},
    111         {USCRIPT_ARABIC, L"tahoma"},
    112         {USCRIPT_DEVANAGARI, L"mangal"},
    113         {USCRIPT_BENGALI, L"vrinda"},
    114         {USCRIPT_GURMUKHI, L"raavi"},
    115         {USCRIPT_GUJARATI, L"shruti"},
    116         {USCRIPT_TAMIL, L"latha"},
    117         {USCRIPT_TELUGU, L"gautami"},
    118         {USCRIPT_KANNADA, L"tunga"},
    119         {USCRIPT_GEORGIAN, L"sylfaen"},
    120         {USCRIPT_ARMENIAN, L"sylfaen"},
    121         {USCRIPT_THAANA, L"mv boli"},
    122         {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
    123         {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
    124         {USCRIPT_MONGOLIAN, L"mongolian balti"},
    125         // For USCRIPT_COMMON, we map blocks to scripts when
    126         // that makes sense.
    127     };
    128 
    129     struct ScriptToFontFamilies {
    130         UScriptCode script;
    131         const UChar** families;
    132     };
    133 
    134     // Kartika on Vista or earlier lacks the support for Chillu
    135     // letters added to Unicode 5.1.
    136     // Try AnjaliOldLipi (a very widely used Malaylalam font with the full
    137     // Unicode 5.x support) before falling back to Kartika.
    138     static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0};
    139     // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better
    140     // with Latin and looks better/larger for the same size.
    141     static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0};
    142     // For the following 6 scripts, two or fonts are listed. The fonts in
    143     // the 1st slot are not available on Windows XP. To support these
    144     // scripts on XP, listed in the rest of slots are widely used
    145     // fonts.
    146     static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0};
    147     static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0};
    148     static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0};
    149     static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0};
    150     static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0};
    151     static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0};
    152     // http://www.bethmardutho.org/support/meltho/download/index.php
    153     static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0};
    154     // No Myanmar/Burmese font is shipped with Windows, yet. Try a few
    155     // widely available/used ones that supports Unicode 5.1 or later.
    156     static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0};
    157 
    158     static const ScriptToFontFamilies scriptToFontFamilies[] = {
    159         {USCRIPT_MALAYALAM, malayalamFonts},
    160         {USCRIPT_KHMER, khmerFonts},
    161         {USCRIPT_ETHIOPIC, ethiopicFonts},
    162         {USCRIPT_ORIYA, oriyaFonts},
    163         {USCRIPT_LAO, laoFonts},
    164         {USCRIPT_TIBETAN, tibetanFonts},
    165         {USCRIPT_SINHALA, sinhalaFonts},
    166         {USCRIPT_YI, yiFonts},
    167         {USCRIPT_SYRIAC, syriacFonts},
    168         {USCRIPT_MYANMAR, myanmarFonts},
    169     };
    170 
    171     for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
    172         scriptFontMap[fontMap[i].script] = fontMap[i].family;
    173 
    174     // FIXME: Instead of scanning the hard-coded list, we have to
    175     // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts
    176     // when it's possible (e.g. using OS/2 table). If we do that, this
    177     // had better be pulled out of here.
    178     for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) {
    179         UScriptCode script = scriptToFontFamilies[i].script;
    180         scriptFontMap[script] = 0;
    181         const UChar** familyPtr = scriptToFontFamilies[i].families;
    182         while (*familyPtr) {
    183             if (isFontPresent(*familyPtr, fontManager)) {
    184                 scriptFontMap[script] = *familyPtr;
    185                 break;
    186             }
    187             ++familyPtr;
    188         }
    189     }
    190 
    191     // Initialize the locale-dependent mapping.
    192     // Since Chrome synchronizes the ICU default locale with its UI locale,
    193     // this ICU locale tells the current UI locale of Chrome.
    194     icu::Locale locale = icu::Locale::getDefault();
    195     const UChar* localeFamily = 0;
    196     if (locale == icu::Locale::getJapanese()) {
    197         localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
    198     } else if (locale == icu::Locale::getKorean()) {
    199         localeFamily = scriptFontMap[USCRIPT_HANGUL];
    200     } else if (locale == icu::Locale::getTraditionalChinese()) {
    201         localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN];
    202     } else {
    203         // For other locales, use the simplified Chinese font for Han.
    204         localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
    205     }
    206     if (localeFamily)
    207         scriptFontMap[USCRIPT_HAN] = localeFamily;
    208 }
    209 
    210 // There are a lot of characters in USCRIPT_COMMON that can be covered
    211 // by fonts for scripts closely related to them. See
    212 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
    213 // FIXME: make this more efficient with a wider coverage
    214 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
    215 {
    216     UBlockCode block = ublock_getCode(ucs4);
    217     switch (block) {
    218     case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
    219         return USCRIPT_HAN;
    220     case UBLOCK_HIRAGANA:
    221     case UBLOCK_KATAKANA:
    222         return USCRIPT_HIRAGANA;
    223     case UBLOCK_ARABIC:
    224         return USCRIPT_ARABIC;
    225     case UBLOCK_THAI:
    226         return USCRIPT_THAI;
    227     case UBLOCK_GREEK:
    228         return USCRIPT_GREEK;
    229     case UBLOCK_DEVANAGARI:
    230         // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
    231         // font for now although they're used by other scripts as well.
    232         // Without a context, we can't do any better.
    233         return USCRIPT_DEVANAGARI;
    234     case UBLOCK_ARMENIAN:
    235         return USCRIPT_ARMENIAN;
    236     case UBLOCK_GEORGIAN:
    237         return USCRIPT_GEORGIAN;
    238     case UBLOCK_KANNADA:
    239         return USCRIPT_KANNADA;
    240     default:
    241         return USCRIPT_COMMON;
    242     }
    243 }
    244 
    245 UScriptCode getScript(int ucs4)
    246 {
    247     UErrorCode err = U_ZERO_ERROR;
    248     UScriptCode script = uscript_getScript(ucs4, &err);
    249     // If script is invalid, common or inherited or there's an error,
    250     // infer a script based on the unicode block of a character.
    251     if (script <= USCRIPT_INHERITED || U_FAILURE(err))
    252         script = getScriptBasedOnUnicodeBlock(ucs4);
    253     return script;
    254 }
    255 
    256 } // namespace
    257 
    258 // FIXME: this is font fallback code version 0.1
    259 //  - Cover all the scripts
    260 //  - Get the default font for each script/generic family from the
    261 //    preference instead of hardcoding in the source.
    262 //    (at least, read values from the registry for IE font settings).
    263 //  - Support generic families (from FontDescription)
    264 //  - If the default font for a script is not available,
    265 //    try some more fonts known to support it. Finally, we can
    266 //    use EnumFontFamilies or similar APIs to come up with a list of
    267 //    fonts supporting the script and cache the result.
    268 //  - Consider using UnicodeSet (or UnicodeMap) converted from
    269 //    GLYPHSET (BMP) or directly read from truetype cmap tables to
    270 //    keep track of which character is supported by which font
    271 //  - Update script_font_cache in response to WM_FONTCHANGE
    272 
    273 const UChar* getFontFamilyForScript(UScriptCode script,
    274     FontDescription::GenericFamilyType generic,
    275     SkFontMgr* fontManager)
    276 {
    277     static ScriptToFontMap scriptFontMap;
    278     static ScriptToFontMap scriptMonospaceFontMap;
    279     static bool initialized = false;
    280     if (!initialized) {
    281         initializeScriptFontMap(scriptFontMap, fontManager);
    282         initializeScriptMonospaceFontMap(scriptMonospaceFontMap, fontManager);
    283         initialized = true;
    284     }
    285     if (script == USCRIPT_INVALID_CODE)
    286         return 0;
    287     ASSERT(script < USCRIPT_CODE_LIMIT);
    288     if (generic == FontDescription::MonospaceFamily && scriptMonospaceFontMap[script])
    289         return scriptMonospaceFontMap[script];
    290     return scriptFontMap[script];
    291 }
    292 
    293 // FIXME:
    294 //  - Handle 'Inherited', 'Common' and 'Unknown'
    295 //    (see http://www.unicode.org/reports/tr24/#Usage_Model )
    296 //    For 'Inherited' and 'Common', perhaps we need to
    297 //    accept another parameter indicating the previous family
    298 //    and just return it.
    299 //  - All the characters (or characters up to the point a single
    300 //    font can cover) need to be taken into account
    301 const UChar* getFallbackFamily(UChar32 character,
    302     FontDescription::GenericFamilyType generic,
    303     UScriptCode* scriptChecked,
    304     SkFontMgr* fontManager)
    305 {
    306     ASSERT(character);
    307     UScriptCode script = getScript(character);
    308 
    309     // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
    310     // Han (determined in a locale-dependent way above). Full-width ASCII
    311     // characters are rather widely used in Japanese and Chinese documents and
    312     // they're fully covered by Chinese, Japanese and Korean fonts.
    313     if (0xFF00 < character && character < 0xFF5F)
    314         script = USCRIPT_HAN;
    315 
    316     if (script == USCRIPT_COMMON)
    317         script = getScriptBasedOnUnicodeBlock(character);
    318 
    319     const UChar* family = getFontFamilyForScript(script, generic, fontManager);
    320     // Another lame work-around to cover non-BMP characters.
    321     // If the font family for script is not found or the character is
    322     // not in BMP (> U+FFFF), we resort to the hard-coded list of
    323     // fallback fonts for now.
    324     if (!family || character > 0xFFFF) {
    325         int plane = character >> 16;
    326         switch (plane) {
    327         case 1:
    328             family = L"code2001";
    329             break;
    330         case 2:
    331             // Use a Traditional Chinese ExtB font if in Traditional Chinese locale.
    332             // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese
    333             // fonts do support a small subset of ExtB (that are included in JIS X 0213),
    334             // but its coverage is rather sparse.
    335             // Eventually, this should be controlled by lang/xml:lang.
    336             if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese())
    337                 family = L"pmingliu-extb";
    338             else
    339                 family = L"simsun-extb";
    340             break;
    341         default:
    342             family = L"lucida sans unicode";
    343         }
    344     }
    345 
    346     if (scriptChecked)
    347         *scriptChecked = script;
    348     return family;
    349 }
    350 
    351 } // namespace blink
    352