Home | History | Annotate | Download | only in chromium
      1 /*
      2  * Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 #include "core/platform/graphics/chromium/FontUtilsChromiumWin.h"
     33 
     34 #include <limits>
     35 
     36 #include <unicode/locid.h>
     37 #include <unicode/uchar.h>
     38 #include "core/platform/graphics/chromium/UniscribeHelper.h"
     39 #include "core/platform/win/HWndDC.h"
     40 #include "wtf/HashMap.h"
     41 #include "wtf/text/StringHash.h"
     42 #include "wtf/text/WTFString.h"
     43 
     44 namespace WebCore {
     45 
     46 namespace {
     47 
     48 bool isFontPresent(const UChar* fontName)
     49 {
     50     HFONT hfont = CreateFont(12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     51                              fontName);
     52     if (!hfont)
     53         return false;
     54     HWndDC dc(0);
     55     HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(dc, hfont));
     56     WCHAR actualFontName[LF_FACESIZE];
     57     GetTextFace(dc, LF_FACESIZE, actualFontName);
     58     actualFontName[LF_FACESIZE - 1] = 0;
     59     SelectObject(dc, oldFont);
     60     DeleteObject(hfont);
     61     // We don't have to worry about East Asian fonts with locale-dependent
     62     // names here for now.
     63     return !wcscmp(fontName, actualFontName);
     64 }
     65 
     66 // A simple mapping from UScriptCode to family name.  This is a sparse array,
     67 // which works well since the range of UScriptCode values is small.
     68 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
     69 
     70 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap)
     71 {
     72     struct FontMap {
     73         UScriptCode script;
     74         const UChar* family;
     75     };
     76 
     77     static const FontMap fontMap[] = {
     78         {USCRIPT_LATIN, L"times new roman"},
     79         {USCRIPT_GREEK, L"times new roman"},
     80         {USCRIPT_CYRILLIC, L"times new roman"},
     81         // FIXME: Consider trying new Vista fonts before XP fonts for CJK.
     82         // Some Vista users do want to use Vista cleartype CJK fonts. If we
     83         // did, the results of tests with CJK characters would have to be
     84         // regenerated for Vista.
     85         {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
     86         {USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
     87         {USCRIPT_HIRAGANA, L"ms pgothic"},
     88         {USCRIPT_KATAKANA, L"ms pgothic"},
     89         {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
     90         {USCRIPT_HANGUL, L"gulim"},
     91         {USCRIPT_THAI, L"tahoma"},
     92         {USCRIPT_HEBREW, L"david"},
     93         {USCRIPT_ARABIC, L"tahoma"},
     94         {USCRIPT_DEVANAGARI, L"mangal"},
     95         {USCRIPT_BENGALI, L"vrinda"},
     96         {USCRIPT_GURMUKHI, L"raavi"},
     97         {USCRIPT_GUJARATI, L"shruti"},
     98         {USCRIPT_TAMIL, L"latha"},
     99         {USCRIPT_TELUGU, L"gautami"},
    100         {USCRIPT_KANNADA, L"tunga"},
    101         {USCRIPT_GEORGIAN, L"sylfaen"},
    102         {USCRIPT_ARMENIAN, L"sylfaen"},
    103         {USCRIPT_THAANA, L"mv boli"},
    104         {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
    105         {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
    106         {USCRIPT_MONGOLIAN, L"mongolian balti"},
    107         // For USCRIPT_COMMON, we map blocks to scripts when
    108         // that makes sense.
    109     };
    110 
    111     struct ScriptToFontFamilies {
    112         UScriptCode script;
    113         const UChar** families;
    114     };
    115 
    116     // Kartika on Vista or earlier lacks the support for Chillu
    117     // letters added to Unicode 5.1.
    118     // Try AnjaliOldLipi (a very widely used Malaylalam font with the full
    119     // Unicode 5.x support) before falling back to Kartika.
    120     static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam", L"Kartika", L"Rachana", 0};
    121     // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better
    122     // with Latin and looks better/larger for the same size.
    123     static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", L"Code2000", 0};
    124     // For the following 6 scripts, two or fonts are listed. The fonts in
    125     // the 1st slot are not available on Windows XP. To support these
    126     // scripts on XP, listed in the rest of slots are widely used
    127     // fonts.
    128     static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiopia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0};
    129     static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 0};
    130     static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarath OT", L"Code2000", 0};
    131     static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L"Tibetan Machine Uni", 0};
    132     static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0};
    133     static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code2000", 0};
    134     // http://www.bethmardutho.org/support/meltho/download/index.php
    135     static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisibin", L"Code2000", 0};
    136     // No Myanmar/Burmese font is shipped with Windows, yet. Try a few
    137     // widely available/used ones that supports Unicode 5.1 or later.
    138     static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L"Code2000", 0};
    139 
    140     static const ScriptToFontFamilies scriptToFontFamilies[] = {
    141         {USCRIPT_MALAYALAM, malayalamFonts},
    142         {USCRIPT_KHMER, khmerFonts},
    143         {USCRIPT_ETHIOPIC, ethiopicFonts},
    144         {USCRIPT_ORIYA, oriyaFonts},
    145         {USCRIPT_LAO, laoFonts},
    146         {USCRIPT_TIBETAN, tibetanFonts},
    147         {USCRIPT_SINHALA, sinhalaFonts},
    148         {USCRIPT_YI, yiFonts},
    149         {USCRIPT_SYRIAC, syriacFonts},
    150         {USCRIPT_MYANMAR, myanmarFonts},
    151     };
    152 
    153     for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i)
    154         scriptFontMap[fontMap[i].script] = fontMap[i].family;
    155 
    156     // FIXME: Instead of scanning the hard-coded list, we have to
    157     // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts
    158     // when it's possible (e.g. using OS/2 table). If we do that, this
    159     // had better be pulled out of here.
    160     for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) {
    161         UScriptCode script = scriptToFontFamilies[i].script;
    162         scriptFontMap[script] = 0;
    163         const UChar** familyPtr = scriptToFontFamilies[i].families;
    164         while (*familyPtr) {
    165             if (isFontPresent(*familyPtr)) {
    166                 scriptFontMap[script] = *familyPtr;
    167                 break;
    168             }
    169             ++familyPtr;
    170         }
    171     }
    172 
    173     // Initialize the locale-dependent mapping.
    174     // Since Chrome synchronizes the ICU default locale with its UI locale,
    175     // this ICU locale tells the current UI locale of Chrome.
    176     icu::Locale locale = icu::Locale::getDefault();
    177     const UChar* localeFamily = 0;
    178     if (locale == icu::Locale::getJapanese())
    179         localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
    180     else if (locale == icu::Locale::getKorean())
    181         localeFamily = scriptFontMap[USCRIPT_HANGUL];
    182     else if (locale == icu::Locale::getTraditionalChinese())
    183         localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN];
    184     else {
    185         // For other locales, use the simplified Chinese font for Han.
    186         localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
    187     }
    188     if (localeFamily)
    189         scriptFontMap[USCRIPT_HAN] = localeFamily;
    190 }
    191 
    192 // There are a lot of characters in USCRIPT_COMMON that can be covered
    193 // by fonts for scripts closely related to them. See
    194 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
    195 // FIXME: make this more efficient with a wider coverage
    196 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
    197 {
    198     UBlockCode block = ublock_getCode(ucs4);
    199     switch (block) {
    200     case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
    201         return USCRIPT_HAN;
    202     case UBLOCK_HIRAGANA:
    203     case UBLOCK_KATAKANA:
    204         return USCRIPT_HIRAGANA;
    205     case UBLOCK_ARABIC:
    206         return USCRIPT_ARABIC;
    207     case UBLOCK_THAI:
    208         return USCRIPT_THAI;
    209     case UBLOCK_GREEK:
    210         return USCRIPT_GREEK;
    211     case UBLOCK_DEVANAGARI:
    212         // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
    213         // font for now although they're used by other scripts as well.
    214         // Without a context, we can't do any better.
    215         return USCRIPT_DEVANAGARI;
    216     case UBLOCK_ARMENIAN:
    217         return USCRIPT_ARMENIAN;
    218     case UBLOCK_GEORGIAN:
    219         return USCRIPT_GEORGIAN;
    220     case UBLOCK_KANNADA:
    221         return USCRIPT_KANNADA;
    222     default:
    223         return USCRIPT_COMMON;
    224     }
    225 }
    226 
    227 UScriptCode getScript(int ucs4)
    228 {
    229     UErrorCode err = U_ZERO_ERROR;
    230     UScriptCode script = uscript_getScript(ucs4, &err);
    231     // If script is invalid, common or inherited or there's an error,
    232     // infer a script based on the unicode block of a character.
    233     if (script <= USCRIPT_INHERITED || U_FAILURE(err))
    234         script = getScriptBasedOnUnicodeBlock(ucs4);
    235     return script;
    236 }
    237 
    238 const int kUndefinedAscent = std::numeric_limits<int>::min();
    239 
    240 // Given an HFONT, return the ascent. If GetTextMetrics fails,
    241 // kUndefinedAscent is returned, instead.
    242 int getAscent(HFONT hfont)
    243 {
    244     HWndDC dc(0);
    245     HGDIOBJ oldFont = SelectObject(dc, hfont);
    246     TEXTMETRIC tm;
    247     BOOL gotMetrics = GetTextMetrics(dc, &tm);
    248     SelectObject(dc, oldFont);
    249     return gotMetrics ? tm.tmAscent : kUndefinedAscent;
    250 }
    251 
    252 const WORD kUnsupportedGlyph = 0xffff;
    253 
    254 WORD getSpaceGlyph(HFONT hfont)
    255 {
    256     HWndDC dc(0);
    257     HGDIOBJ oldFont = SelectObject(dc, hfont);
    258     WCHAR space = L' ';
    259     WORD spaceGlyph = kUnsupportedGlyph;
    260     GetGlyphIndices(dc, &space, 1, &spaceGlyph, GGI_MARK_NONEXISTING_GLYPHS);
    261     SelectObject(dc, oldFont);
    262     return spaceGlyph;
    263 }
    264 
    265 struct FontData {
    266     FontData()
    267         : hfont(0)
    268         , ascent(kUndefinedAscent)
    269         , scriptCache(0)
    270         , spaceGlyph(0)
    271     {
    272     }
    273 
    274     HFONT hfont;
    275     int ascent;
    276     mutable SCRIPT_CACHE scriptCache;
    277     WORD spaceGlyph;
    278 };
    279 
    280 // Again, using hash_map does not earn us much here.  page_cycler_test intl2
    281 // gave us a 'better' result with map than with hash_map even though they're
    282 // well-within 1-sigma of each other so that the difference is not significant.
    283 // On the other hand, some pages in intl2 seem to take longer to load with map
    284 // in the 1st pass. Need to experiment further.
    285 typedef HashMap<String, FontData> FontDataCache;
    286 
    287 } // namespace
    288 
    289 // FIXME: this is font fallback code version 0.1
    290 //  - Cover all the scripts
    291 //  - Get the default font for each script/generic family from the
    292 //    preference instead of hardcoding in the source.
    293 //    (at least, read values from the registry for IE font settings).
    294 //  - Support generic families (from FontDescription)
    295 //  - If the default font for a script is not available,
    296 //    try some more fonts known to support it. Finally, we can
    297 //    use EnumFontFamilies or similar APIs to come up with a list of
    298 //    fonts supporting the script and cache the result.
    299 //  - Consider using UnicodeSet (or UnicodeMap) converted from
    300 //    GLYPHSET (BMP) or directly read from truetype cmap tables to
    301 //    keep track of which character is supported by which font
    302 //  - Update script_font_cache in response to WM_FONTCHANGE
    303 
    304 const UChar* getFontFamilyForScript(UScriptCode script,
    305                                     FontDescription::GenericFamilyType generic)
    306 {
    307     static ScriptToFontMap scriptFontMap;
    308     static bool initialized = false;
    309     if (!initialized) {
    310         initializeScriptFontMap(scriptFontMap);
    311         initialized = true;
    312     }
    313     if (script == USCRIPT_INVALID_CODE)
    314         return 0;
    315     ASSERT(script < USCRIPT_CODE_LIMIT);
    316     return scriptFontMap[script];
    317 }
    318 
    319 // FIXME:
    320 //  - Handle 'Inherited', 'Common' and 'Unknown'
    321 //    (see http://www.unicode.org/reports/tr24/#Usage_Model )
    322 //    For 'Inherited' and 'Common', perhaps we need to
    323 //    accept another parameter indicating the previous family
    324 //    and just return it.
    325 //  - All the characters (or characters up to the point a single
    326 //    font can cover) need to be taken into account
    327 const UChar* getFallbackFamily(const UChar* characters,
    328                                int length,
    329                                FontDescription::GenericFamilyType generic,
    330                                UChar32* charChecked,
    331                                UScriptCode* scriptChecked)
    332 {
    333     ASSERT(characters && characters[0] && length > 0);
    334     UScriptCode script = USCRIPT_COMMON;
    335 
    336     // Sometimes characters common to script (e.g. space) is at
    337     // the beginning of a string so that we need to skip them
    338     // to get a font required to render the string.
    339     int i = 0;
    340     UChar32 ucs4 = 0;
    341     while (i < length && script == USCRIPT_COMMON) {
    342         U16_NEXT(characters, i, length, ucs4);
    343         script = getScript(ucs4);
    344     }
    345 
    346     // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
    347     // Han (determined in a locale-dependent way above). Full-width ASCII
    348     // characters are rather widely used in Japanese and Chinese documents and
    349     // they're fully covered by Chinese, Japanese and Korean fonts.
    350     if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
    351         script = USCRIPT_HAN;
    352 
    353     if (script == USCRIPT_COMMON)
    354         script = getScriptBasedOnUnicodeBlock(ucs4);
    355 
    356     const UChar* family = getFontFamilyForScript(script, generic);
    357     // Another lame work-around to cover non-BMP characters.
    358     // If the font family for script is not found or the character is
    359     // not in BMP (> U+FFFF), we resort to the hard-coded list of
    360     // fallback fonts for now.
    361     if (!family || ucs4 > 0xFFFF) {
    362         int plane = ucs4 >> 16;
    363         switch (plane) {
    364         case 1:
    365             family = L"code2001";
    366             break;
    367         case 2:
    368             // Use a Traditional Chinese ExtB font if in Traditional Chinese locale.
    369             // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese
    370             // fonts do support a small subset of ExtB (that are included in JIS X 0213),
    371             // but its coverage is rather sparse.
    372             // Eventually, this should be controlled by lang/xml:lang.
    373             if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese())
    374               family = L"pmingliu-extb";
    375             else
    376               family = L"simsun-extb";
    377             break;
    378         default:
    379             family = L"lucida sans unicode";
    380         }
    381     }
    382 
    383     if (charChecked)
    384         *charChecked = ucs4;
    385     if (scriptChecked)
    386         *scriptChecked = script;
    387     return family;
    388 }
    389 
    390 // Be aware that this is not thread-safe.
    391 bool getDerivedFontData(const UChar* family,
    392                         int style,
    393                         LOGFONT* logfont,
    394                         int* ascent,
    395                         HFONT* hfont,
    396                         SCRIPT_CACHE** scriptCache,
    397                         WORD* spaceGlyph)
    398 {
    399     ASSERT(logfont);
    400     ASSERT(family);
    401     ASSERT(*family);
    402 
    403     // It does not matter that we leak font data when we exit.
    404     static FontDataCache fontDataCache;
    405 
    406     // FIXME: This comes up pretty high in the profile so that
    407     // we need to measure whether using SHA256 (after coercing all the
    408     // fields to char*) is faster than String::format.
    409     String fontKey = String::format("%1d:%d:%ls", style, logfont->lfHeight, family);
    410     FontDataCache::iterator iter = fontDataCache.find(fontKey);
    411     FontData* derived;
    412     if (iter == fontDataCache.end()) {
    413         ASSERT(wcslen(family) < LF_FACESIZE);
    414         wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
    415         // FIXME: CreateFontIndirect always comes up with
    416         // a font even if there's no font matching the name. Need to
    417         // check it against what we actually want (as is done in
    418         // FontCacheWin.cpp)
    419         FontDataCache::AddResult entry = fontDataCache.add(fontKey, FontData());
    420         derived = &entry.iterator->value;
    421         derived->hfont = CreateFontIndirect(logfont);
    422         // GetAscent may return kUndefinedAscent, but we still want to
    423         // cache it so that we won't have to call CreateFontIndirect once
    424         // more for HFONT next time.
    425         derived->ascent = getAscent(derived->hfont);
    426         derived->spaceGlyph = getSpaceGlyph(derived->hfont);
    427     } else {
    428         derived = &iter->value;
    429         // Last time, getAscent or getSpaceGlyph failed so that only HFONT was
    430         // cached. Try once more assuming that TryPreloadFont
    431         // was called by a caller between calls.
    432         if (kUndefinedAscent == derived->ascent)
    433             derived->ascent = getAscent(derived->hfont);
    434         if (kUnsupportedGlyph == derived->spaceGlyph)
    435             derived->spaceGlyph = getSpaceGlyph(derived->hfont);
    436     }
    437     *hfont = derived->hfont;
    438     *ascent = derived->ascent;
    439     *scriptCache = &(derived->scriptCache);
    440     *spaceGlyph = derived->spaceGlyph;
    441     return *ascent != kUndefinedAscent && *spaceGlyph != kUnsupportedGlyph;
    442 }
    443 
    444 int getStyleFromLogfont(const LOGFONT* logfont)
    445 {
    446     // FIXME: consider defining UNDEFINED or INVALID for style and
    447     //                  returning it when logfont is 0
    448     if (!logfont) {
    449         ASSERT_NOT_REACHED();
    450         return FontStyleNormal;
    451     }
    452     return (logfont->lfItalic ? FontStyleItalic : FontStyleNormal) |
    453            (logfont->lfUnderline ? FontStyleUnderlined : FontStyleNormal) |
    454            (logfont->lfWeight >= 700 ? FontStyleBold : FontStyleNormal);
    455 }
    456 
    457 } // namespace WebCore
    458