Home | History | Annotate | Download | only in chromium
      1 /*
      2  * Copyright (c) 2006, 2007, 2008, Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 #include "FontUtilsChromiumWin.h"
     33 
     34 #include <limits>
     35 
     36 #include "PlatformString.h"
     37 #include "StringHash.h"
     38 #include "UniscribeHelper.h"
     39 #include <unicode/locid.h>
     40 #include <unicode/uchar.h>
     41 #include <wtf/HashMap.h>
     42 
     43 namespace WebCore {
     44 
     45 namespace {
     46 
     47 // A simple mapping from UScriptCode to family name.  This is a sparse array,
     48 // which works well since the range of UScriptCode values is small.
     49 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT];
     50 
     51 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap)
     52 {
     53     struct FontMap {
     54         UScriptCode script;
     55         const UChar* family;
     56     };
     57 
     58     const static FontMap fontMap[] = {
     59         {USCRIPT_LATIN, L"times new roman"},
     60         {USCRIPT_GREEK, L"times new roman"},
     61         {USCRIPT_CYRILLIC, L"times new roman"},
     62         {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
     63         {USCRIPT_HIRAGANA, L"ms pgothic"},
     64         {USCRIPT_KATAKANA, L"ms pgothic"},
     65         {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
     66         {USCRIPT_HANGUL, L"gulim"},
     67         {USCRIPT_THAI, L"tahoma"},
     68         {USCRIPT_HEBREW, L"david"},
     69         {USCRIPT_ARABIC, L"tahoma"},
     70         {USCRIPT_DEVANAGARI, L"mangal"},
     71         {USCRIPT_BENGALI, L"vrinda"},
     72         {USCRIPT_GURMUKHI, L"raavi"},
     73         {USCRIPT_GUJARATI, L"shruti"},
     74         {USCRIPT_ORIYA, L"kalinga"},
     75         {USCRIPT_TAMIL, L"latha"},
     76         {USCRIPT_TELUGU, L"gautami"},
     77         {USCRIPT_KANNADA, L"tunga"},
     78         {USCRIPT_MALAYALAM, L"kartika"},
     79         {USCRIPT_LAO, L"dokchampa"},
     80         {USCRIPT_TIBETAN, L"microsoft himalaya"},
     81         {USCRIPT_GEORGIAN, L"sylfaen"},
     82         {USCRIPT_ARMENIAN, L"sylfaen"},
     83         {USCRIPT_ETHIOPIC, L"nyala"},
     84         {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
     85         {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
     86         {USCRIPT_YI, L"microsoft yi balti"},
     87         {USCRIPT_SINHALA, L"iskoola pota"},
     88         {USCRIPT_SYRIAC, L"estrangelo edessa"},
     89         {USCRIPT_KHMER, L"daunpenh"},
     90         {USCRIPT_THAANA, L"mv boli"},
     91         {USCRIPT_MONGOLIAN, L"mongolian balti"},
     92         {USCRIPT_MYANMAR, L"padauk"},
     93         // For USCRIPT_COMMON, we map blocks to scripts when
     94         // that makes sense.
     95     };
     96 
     97     for (int i = 0; i < sizeof(fontMap) / sizeof(fontMap[0]); ++i)
     98         scriptFontMap[fontMap[i].script] = fontMap[i].family;
     99 
    100     // Initialize the locale-dependent mapping.
    101     // Since Chrome synchronizes the ICU default locale with its UI locale,
    102     // this ICU locale tells the current UI locale of Chrome.
    103     icu::Locale locale = icu::Locale::getDefault();
    104     const UChar* localeFamily = 0;
    105     if (locale == icu::Locale::getJapanese())
    106         localeFamily = scriptFontMap[USCRIPT_HIRAGANA];
    107     else if (locale == icu::Locale::getKorean())
    108         localeFamily = scriptFontMap[USCRIPT_HANGUL];
    109     else {
    110         // Use Simplified Chinese font for all other locales including
    111         // Traditional Chinese because Simsun (SC font) has a wider
    112         // coverage (covering both SC and TC) than PMingLiu (TC font).
    113         // Note that |fontMap| does not have a separate entry for
    114         // USCRIPT_TRADITIONAL_HAN for that reason.
    115         // This also speeds up the TC version of Chrome when rendering SC
    116         // pages.
    117         localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN];
    118     }
    119     if (localeFamily)
    120         scriptFontMap[USCRIPT_HAN] = localeFamily;
    121 }
    122 
    123 // There are a lot of characters in USCRIPT_COMMON that can be covered
    124 // by fonts for scripts closely related to them. See
    125 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
    126 // FIXME: make this more efficient with a wider coverage
    127 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4)
    128 {
    129     UBlockCode block = ublock_getCode(ucs4);
    130     switch (block) {
    131     case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
    132         return USCRIPT_HAN;
    133     case UBLOCK_HIRAGANA:
    134     case UBLOCK_KATAKANA:
    135         return USCRIPT_HIRAGANA;
    136     case UBLOCK_ARABIC:
    137         return USCRIPT_ARABIC;
    138     case UBLOCK_THAI:
    139         return USCRIPT_THAI;
    140     case UBLOCK_GREEK:
    141         return USCRIPT_GREEK;
    142     case UBLOCK_DEVANAGARI:
    143         // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
    144         // font for now although they're used by other scripts as well.
    145         // Without a context, we can't do any better.
    146         return USCRIPT_DEVANAGARI;
    147     case UBLOCK_ARMENIAN:
    148         return USCRIPT_ARMENIAN;
    149     case UBLOCK_GEORGIAN:
    150         return USCRIPT_GEORGIAN;
    151     case UBLOCK_KANNADA:
    152         return USCRIPT_KANNADA;
    153     default:
    154         return USCRIPT_COMMON;
    155     }
    156 }
    157 
    158 UScriptCode getScript(int ucs4)
    159 {
    160     UErrorCode err = U_ZERO_ERROR;
    161     UScriptCode script = uscript_getScript(ucs4, &err);
    162     // If script is invalid, common or inherited or there's an error,
    163     // infer a script based on the unicode block of a character.
    164     if (script <= USCRIPT_INHERITED || U_FAILURE(err))
    165         script = getScriptBasedOnUnicodeBlock(ucs4);
    166     return script;
    167 }
    168 
    169 const int kUndefinedAscent = std::numeric_limits<int>::min();
    170 
    171 // Given an HFONT, return the ascent. If GetTextMetrics fails,
    172 // kUndefinedAscent is returned, instead.
    173 int getAscent(HFONT hfont)
    174 {
    175     HDC dc = GetDC(0);
    176     HGDIOBJ oldFont = SelectObject(dc, hfont);
    177     TEXTMETRIC tm;
    178     BOOL gotMetrics = GetTextMetrics(dc, &tm);
    179     SelectObject(dc, oldFont);
    180     ReleaseDC(0, dc);
    181     return gotMetrics ? tm.tmAscent : kUndefinedAscent;
    182 }
    183 
    184 struct FontData {
    185     FontData()
    186         : hfont(0)
    187         , ascent(kUndefinedAscent)
    188         , scriptCache(0)
    189     {
    190     }
    191 
    192     HFONT hfont;
    193     int ascent;
    194     mutable SCRIPT_CACHE scriptCache;
    195 };
    196 
    197 // Again, using hash_map does not earn us much here.  page_cycler_test intl2
    198 // gave us a 'better' result with map than with hash_map even though they're
    199 // well-within 1-sigma of each other so that the difference is not significant.
    200 // On the other hand, some pages in intl2 seem to take longer to load with map
    201 // in the 1st pass. Need to experiment further.
    202 typedef HashMap<String, FontData> FontDataCache;
    203 
    204 }  // namespace
    205 
    206 // FIXME: this is font fallback code version 0.1
    207 //  - Cover all the scripts
    208 //  - Get the default font for each script/generic family from the
    209 //    preference instead of hardcoding in the source.
    210 //    (at least, read values from the registry for IE font settings).
    211 //  - Support generic families (from FontDescription)
    212 //  - If the default font for a script is not available,
    213 //    try some more fonts known to support it. Finally, we can
    214 //    use EnumFontFamilies or similar APIs to come up with a list of
    215 //    fonts supporting the script and cache the result.
    216 //  - Consider using UnicodeSet (or UnicodeMap) converted from
    217 //    GLYPHSET (BMP) or directly read from truetype cmap tables to
    218 //    keep track of which character is supported by which font
    219 //  - Update script_font_cache in response to WM_FONTCHANGE
    220 
    221 const UChar* getFontFamilyForScript(UScriptCode script,
    222                                     FontDescription::GenericFamilyType generic)
    223 {
    224     static ScriptToFontMap scriptFontMap;
    225     static bool initialized = false;
    226     if (!initialized) {
    227         initializeScriptFontMap(scriptFontMap);
    228         initialized = true;
    229     }
    230     if (script == USCRIPT_INVALID_CODE)
    231         return 0;
    232     ASSERT(script < USCRIPT_CODE_LIMIT);
    233     return scriptFontMap[script];
    234 }
    235 
    236 // FIXME:
    237 //  - Handle 'Inherited', 'Common' and 'Unknown'
    238 //    (see http://www.unicode.org/reports/tr24/#Usage_Model )
    239 //    For 'Inherited' and 'Common', perhaps we need to
    240 //    accept another parameter indicating the previous family
    241 //    and just return it.
    242 //  - All the characters (or characters up to the point a single
    243 //    font can cover) need to be taken into account
    244 const UChar* getFallbackFamily(const UChar* characters,
    245                                int length,
    246                                FontDescription::GenericFamilyType generic,
    247                                UChar32* charChecked,
    248                                UScriptCode* scriptChecked)
    249 {
    250     ASSERT(characters && characters[0] && length > 0);
    251     UScriptCode script = USCRIPT_COMMON;
    252 
    253     // Sometimes characters common to script (e.g. space) is at
    254     // the beginning of a string so that we need to skip them
    255     // to get a font required to render the string.
    256     int i = 0;
    257     UChar32 ucs4 = 0;
    258     while (i < length && script == USCRIPT_COMMON) {
    259         U16_NEXT(characters, i, length, ucs4);
    260         script = getScript(ucs4);
    261     }
    262 
    263     // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for
    264     // Han (determined in a locale-dependent way above). Full-width ASCII
    265     // characters are rather widely used in Japanese and Chinese documents and
    266     // they're fully covered by Chinese, Japanese and Korean fonts.
    267     if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
    268         script = USCRIPT_HAN;
    269 
    270     if (script == USCRIPT_COMMON)
    271         script = getScriptBasedOnUnicodeBlock(ucs4);
    272 
    273     // Another lame work-around to cover non-BMP characters.
    274     const UChar* family = getFontFamilyForScript(script, generic);
    275     if (!family) {
    276         int plane = ucs4 >> 16;
    277         switch (plane) {
    278         case 1:
    279             family = L"code2001";
    280             break;
    281         case 2:
    282             family = L"simsun-extb";
    283             break;
    284         default:
    285             family = L"lucida sans unicode";
    286         }
    287     }
    288 
    289     if (charChecked)
    290         *charChecked = ucs4;
    291     if (scriptChecked)
    292         *scriptChecked = script;
    293     return family;
    294 }
    295 
    296 // Be aware that this is not thread-safe.
    297 bool getDerivedFontData(const UChar* family,
    298                         int style,
    299                         LOGFONT* logfont,
    300                         int* ascent,
    301                         HFONT* hfont,
    302                         SCRIPT_CACHE** scriptCache)
    303 {
    304     ASSERT(logfont);
    305     ASSERT(family);
    306     ASSERT(*family);
    307 
    308     // It does not matter that we leak font data when we exit.
    309     static FontDataCache fontDataCache;
    310 
    311     // FIXME: This comes up pretty high in the profile so that
    312     // we need to measure whether using SHA256 (after coercing all the
    313     // fields to char*) is faster than String::format.
    314     String fontKey = String::format("%1d:%d:%ls", style, logfont->lfHeight, family);
    315     FontDataCache::iterator iter = fontDataCache.find(fontKey);
    316     FontData* derived;
    317     if (iter == fontDataCache.end()) {
    318         ASSERT(wcslen(family) < LF_FACESIZE);
    319         wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
    320         // FIXME: CreateFontIndirect always comes up with
    321         // a font even if there's no font matching the name. Need to
    322         // check it against what we actually want (as is done in
    323         // FontCacheWin.cpp)
    324         pair<FontDataCache::iterator, bool> entry = fontDataCache.add(fontKey, FontData());
    325         derived = &entry.first->second;
    326         derived->hfont = CreateFontIndirect(logfont);
    327         // GetAscent may return kUndefinedAscent, but we still want to
    328         // cache it so that we won't have to call CreateFontIndirect once
    329         // more for HFONT next time.
    330         derived->ascent = getAscent(derived->hfont);
    331     } else {
    332         derived = &iter->second;
    333         // Last time, GetAscent failed so that only HFONT was
    334         // cached. Try once more assuming that TryPreloadFont
    335         // was called by a caller between calls.
    336         if (kUndefinedAscent == derived->ascent)
    337             derived->ascent = getAscent(derived->hfont);
    338     }
    339     *hfont = derived->hfont;
    340     *ascent = derived->ascent;
    341     *scriptCache = &(derived->scriptCache);
    342     return *ascent != kUndefinedAscent;
    343 }
    344 
    345 int getStyleFromLogfont(const LOGFONT* logfont)
    346 {
    347     // FIXME: consider defining UNDEFINED or INVALID for style and
    348     //                  returning it when logfont is 0
    349     if (!logfont) {
    350         ASSERT_NOT_REACHED();
    351         return FontStyleNormal;
    352     }
    353     return (logfont->lfItalic ? FontStyleItalic : FontStyleNormal) |
    354            (logfont->lfUnderline ? FontStyleUnderlined : FontStyleNormal) |
    355            (logfont->lfWeight >= 700 ? FontStyleBold : FontStyleNormal);
    356 }
    357 
    358 }  // namespace WebCore
    359