Home | History | Annotate | Download | only in fonts
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  *           (C) 1999 Antti Koivisto (koivisto (at) kde.org)
      4  *           (C) 2000 Dirk Mueller (mueller (at) kde.org)
      5  * Copyright (C) 2003, 2006, 2010, 2011 Apple Inc. All rights reserved.
      6  *
      7  * This library is free software; you can redistribute it and/or
      8  * modify it under the terms of the GNU Library General Public
      9  * License as published by the Free Software Foundation; either
     10  * version 2 of the License, or (at your option) any later version.
     11  *
     12  * This library is distributed in the hope that it will be useful,
     13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  * Library General Public License for more details.
     16  *
     17  * You should have received a copy of the GNU Library General Public License
     18  * along with this library; see the file COPYING.LIB.  If not, write to
     19  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     20  * Boston, MA 02110-1301, USA.
     21  *
     22  */
     23 
     24 #include "config.h"
     25 #include "platform/fonts/Font.h"
     26 
     27 #include "platform/fonts/WidthIterator.h"
     28 #include "platform/geometry/FloatRect.h"
     29 #include "platform/text/TextRun.h"
     30 #include "wtf/MainThread.h"
     31 #include "wtf/StdLibExtras.h"
     32 #include "wtf/text/StringBuilder.h"
     33 
     34 using namespace WTF;
     35 using namespace Unicode;
     36 
     37 namespace WTF {
     38 
     39 // allow compilation of OwnPtr<TextLayout> in source files that don't have access to the TextLayout class definition
     40 void OwnedPtrDeleter<WebCore::TextLayout>::deletePtr(WebCore::TextLayout* ptr)
     41 {
     42     WebCore::Font::deleteLayout(ptr);
     43 }
     44 
     45 }
     46 
     47 namespace WebCore {
     48 
     49 const uint8_t Font::s_roundingHackCharacterTable[256] = {
     50     0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*\t*/, 1 /*\n*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     51     1 /*space*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*-*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*?*/,
     52     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     53     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     54     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     55     1 /*no-break space*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     56     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     57     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
     58 };
     59 
     60 static const UChar32 cjkIsolatedSymbolsArray[] = {
     61     // 0x2C7 Caron, Mandarin Chinese 3rd Tone
     62     0x2C7,
     63     // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone
     64     0x2CA,
     65     // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone
     66     0x2CB,
     67     // 0x2D9 Dot Above, Mandarin Chinese 5th Tone
     68     0x2D9,
     69     0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x2051,
     70     0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x2121,
     71     0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23CE,
     72     0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25B6,
     73     0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25CC,
     74     0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26BD,
     75     0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE12,
     76     0xFE19, 0xFF1D,
     77     // Emoji.
     78     0x1F100
     79 };
     80 
     81 Font::CodePath Font::s_codePath = Auto;
     82 
     83 TypesettingFeatures Font::s_defaultTypesettingFeatures = 0;
     84 
     85 // ============================================================================================
     86 // Font Implementation (Cross-Platform Portion)
     87 // ============================================================================================
     88 
     89 Font::Font()
     90     : m_letterSpacing(0)
     91     , m_wordSpacing(0)
     92     , m_isPlatformFont(false)
     93     , m_typesettingFeatures(0)
     94 {
     95 }
     96 
     97 Font::Font(const FontDescription& fd, float letterSpacing, float wordSpacing)
     98     : m_fontDescription(fd)
     99     , m_letterSpacing(letterSpacing)
    100     , m_wordSpacing(wordSpacing)
    101     , m_isPlatformFont(false)
    102     , m_typesettingFeatures(computeTypesettingFeatures())
    103 {
    104 }
    105 
    106 Font::Font(const FontPlatformData& fontData, bool isPrinterFont, FontSmoothingMode fontSmoothingMode)
    107     : m_fontFallbackList(FontFallbackList::create())
    108     , m_letterSpacing(0)
    109     , m_wordSpacing(0)
    110     , m_isPlatformFont(true)
    111     , m_typesettingFeatures(computeTypesettingFeatures())
    112 {
    113     m_fontDescription.setUsePrinterFont(isPrinterFont);
    114     m_fontDescription.setFontSmoothing(fontSmoothingMode);
    115     m_fontFallbackList->setPlatformFont(fontData);
    116 }
    117 
    118 Font::Font(const Font& other)
    119     : m_fontDescription(other.m_fontDescription)
    120     , m_fontFallbackList(other.m_fontFallbackList)
    121     , m_letterSpacing(other.m_letterSpacing)
    122     , m_wordSpacing(other.m_wordSpacing)
    123     , m_isPlatformFont(other.m_isPlatformFont)
    124     , m_typesettingFeatures(computeTypesettingFeatures())
    125 {
    126 }
    127 
    128 Font& Font::operator=(const Font& other)
    129 {
    130     m_fontDescription = other.m_fontDescription;
    131     m_fontFallbackList = other.m_fontFallbackList;
    132     m_letterSpacing = other.m_letterSpacing;
    133     m_wordSpacing = other.m_wordSpacing;
    134     m_isPlatformFont = other.m_isPlatformFont;
    135     m_typesettingFeatures = other.m_typesettingFeatures;
    136     return *this;
    137 }
    138 
    139 bool Font::operator==(const Font& other) const
    140 {
    141     // Our FontData don't have to be checked, since checking the font description will be fine.
    142     // FIXME: This does not work if the font was made with the FontPlatformData constructor.
    143     if (loadingCustomFonts() || other.loadingCustomFonts())
    144         return false;
    145 
    146     FontSelector* first = m_fontFallbackList ? m_fontFallbackList->fontSelector() : 0;
    147     FontSelector* second = other.m_fontFallbackList ? other.m_fontFallbackList->fontSelector() : 0;
    148 
    149     return first == second
    150         && m_fontDescription == other.m_fontDescription
    151         && m_letterSpacing == other.m_letterSpacing
    152         && m_wordSpacing == other.m_wordSpacing
    153         && (m_fontFallbackList ? m_fontFallbackList->fontSelectorVersion() : 0) == (other.m_fontFallbackList ? other.m_fontFallbackList->fontSelectorVersion() : 0)
    154         && (m_fontFallbackList ? m_fontFallbackList->generation() : 0) == (other.m_fontFallbackList ? other.m_fontFallbackList->generation() : 0);
    155 }
    156 
    157 void Font::update(PassRefPtr<FontSelector> fontSelector) const
    158 {
    159     // FIXME: It is pretty crazy that we are willing to just poke into a RefPtr, but it ends up
    160     // being reasonably safe (because inherited fonts in the render tree pick up the new
    161     // style anyway. Other copies are transient, e.g., the state in the GraphicsContext, and
    162     // won't stick around long enough to get you in trouble). Still, this is pretty disgusting,
    163     // and could eventually be rectified by using RefPtrs for Fonts themselves.
    164     if (!m_fontFallbackList)
    165         m_fontFallbackList = FontFallbackList::create();
    166     m_fontFallbackList->invalidate(fontSelector);
    167     m_typesettingFeatures = computeTypesettingFeatures();
    168 }
    169 
    170 void Font::drawText(GraphicsContext* context, const TextRunPaintInfo& runInfo, const FloatPoint& point, CustomFontNotReadyAction customFontNotReadyAction) const
    171 {
    172     // Don't draw anything while we are using custom fonts that are in the process of loading,
    173     // except if the 'force' argument is set to true (in which case it will use a fallback
    174     // font).
    175     if (loadingCustomFonts() && customFontNotReadyAction == DoNotPaintIfFontNotReady)
    176         return;
    177 
    178     CodePath codePathToUse = codePath(runInfo.run);
    179     // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050
    180     if (codePathToUse != Complex && typesettingFeatures() && (runInfo.from || runInfo.to != runInfo.run.length()))
    181         codePathToUse = Complex;
    182 
    183     if (codePathToUse != Complex)
    184         return drawSimpleText(context, runInfo, point);
    185 
    186     return drawComplexText(context, runInfo, point);
    187 }
    188 
    189 void Font::drawEmphasisMarks(GraphicsContext* context, const TextRunPaintInfo& runInfo, const AtomicString& mark, const FloatPoint& point) const
    190 {
    191     if (loadingCustomFonts())
    192         return;
    193 
    194     CodePath codePathToUse = codePath(runInfo.run);
    195     // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050
    196     if (codePathToUse != Complex && typesettingFeatures() && (runInfo.from || runInfo.to != runInfo.run.length()))
    197         codePathToUse = Complex;
    198 
    199     if (codePathToUse != Complex)
    200         drawEmphasisMarksForSimpleText(context, runInfo, mark, point);
    201     else
    202         drawEmphasisMarksForComplexText(context, runInfo, mark, point);
    203 }
    204 
    205 float Font::width(const TextRun& run, HashSet<const SimpleFontData*>* fallbackFonts, GlyphOverflow* glyphOverflow) const
    206 {
    207     CodePath codePathToUse = codePath(run);
    208     if (codePathToUse != Complex) {
    209         // The complex path is more restrictive about returning fallback fonts than the simple path, so we need an explicit test to make their behaviors match.
    210         if (!canReturnFallbackFontsForComplexText())
    211             fallbackFonts = 0;
    212         // The simple path can optimize the case where glyph overflow is not observable.
    213         if (codePathToUse != SimpleWithGlyphOverflow && (glyphOverflow && !glyphOverflow->computeBounds))
    214             glyphOverflow = 0;
    215     }
    216 
    217     bool hasKerningOrLigatures = typesettingFeatures() & (Kerning | Ligatures);
    218     bool hasWordSpacingOrLetterSpacing = wordSpacing() || letterSpacing();
    219     float* cacheEntry = m_fontFallbackList->widthCache().add(run, std::numeric_limits<float>::quiet_NaN(), hasKerningOrLigatures, hasWordSpacingOrLetterSpacing, glyphOverflow);
    220     if (cacheEntry && !std::isnan(*cacheEntry))
    221         return *cacheEntry;
    222 
    223     float result;
    224     if (codePathToUse == Complex)
    225         result = floatWidthForComplexText(run, fallbackFonts, glyphOverflow);
    226     else
    227         result = floatWidthForSimpleText(run, fallbackFonts, glyphOverflow);
    228 
    229     if (cacheEntry && (!fallbackFonts || fallbackFonts->isEmpty()))
    230         *cacheEntry = result;
    231     return result;
    232 }
    233 
    234 float Font::width(const TextRun& run, int& charsConsumed, String& glyphName) const
    235 {
    236 #if ENABLE(SVG_FONTS)
    237     if (TextRun::RenderingContext* renderingContext = run.renderingContext())
    238         return renderingContext->floatWidthUsingSVGFont(*this, run, charsConsumed, glyphName);
    239 #endif
    240 
    241     charsConsumed = run.length();
    242     glyphName = "";
    243     return width(run);
    244 }
    245 
    246 #if !OS(MACOSX)
    247 
    248 PassOwnPtr<TextLayout> Font::createLayoutForMacComplexText(const TextRun&, unsigned, float, bool) const
    249 {
    250     ASSERT_NOT_REACHED();
    251     return nullptr;
    252 }
    253 
    254 void Font::deleteLayout(TextLayout*)
    255 {
    256 }
    257 
    258 float Font::width(TextLayout&, unsigned, unsigned, HashSet<const SimpleFontData*>*)
    259 {
    260     ASSERT_NOT_REACHED();
    261     return 0;
    262 }
    263 
    264 #endif
    265 
    266 FloatRect Font::selectionRectForText(const TextRun& run, const FloatPoint& point, int h, int from, int to) const
    267 {
    268     to = (to == -1 ? run.length() : to);
    269 
    270     CodePath codePathToUse = codePath(run);
    271     // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050
    272     if (codePathToUse != Complex && typesettingFeatures() && (from || to != run.length()))
    273         codePathToUse = Complex;
    274 
    275     if (codePathToUse != Complex)
    276         return selectionRectForSimpleText(run, point, h, from, to);
    277 
    278     return selectionRectForComplexText(run, point, h, from, to);
    279 }
    280 
    281 int Font::offsetForPosition(const TextRun& run, float x, bool includePartialGlyphs) const
    282 {
    283     // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050
    284     if (codePath(run) != Complex && !typesettingFeatures())
    285         return offsetForPositionForSimpleText(run, x, includePartialGlyphs);
    286 
    287     return offsetForPositionForComplexText(run, x, includePartialGlyphs);
    288 }
    289 
    290 template <typename CharacterType>
    291 static inline String normalizeSpacesInternal(const CharacterType* characters, unsigned length)
    292 {
    293     StringBuilder normalized;
    294     normalized.reserveCapacity(length);
    295 
    296     for (unsigned i = 0; i < length; ++i)
    297         normalized.append(Font::normalizeSpaces(characters[i]));
    298 
    299     return normalized.toString();
    300 }
    301 
    302 String Font::normalizeSpaces(const LChar* characters, unsigned length)
    303 {
    304     return normalizeSpacesInternal(characters, length);
    305 }
    306 
    307 String Font::normalizeSpaces(const UChar* characters, unsigned length)
    308 {
    309     return normalizeSpacesInternal(characters, length);
    310 }
    311 
    312 static bool shouldUseFontSmoothing = true;
    313 
    314 void Font::setShouldUseSmoothing(bool shouldUseSmoothing)
    315 {
    316     ASSERT(isMainThread());
    317     shouldUseFontSmoothing = shouldUseSmoothing;
    318 }
    319 
    320 bool Font::shouldUseSmoothing()
    321 {
    322     return shouldUseFontSmoothing;
    323 }
    324 
    325 void Font::setCodePath(CodePath p)
    326 {
    327     s_codePath = p;
    328 }
    329 
    330 Font::CodePath Font::codePath()
    331 {
    332     return s_codePath;
    333 }
    334 
    335 void Font::setDefaultTypesettingFeatures(TypesettingFeatures typesettingFeatures)
    336 {
    337     s_defaultTypesettingFeatures = typesettingFeatures;
    338 }
    339 
    340 TypesettingFeatures Font::defaultTypesettingFeatures()
    341 {
    342     return s_defaultTypesettingFeatures;
    343 }
    344 
    345 Font::CodePath Font::codePath(const TextRun& run) const
    346 {
    347     if (s_codePath != Auto)
    348         return s_codePath;
    349 
    350 #if ENABLE(SVG_FONTS)
    351     if (run.renderingContext())
    352         return Simple;
    353 #endif
    354 
    355     if (m_fontDescription.featureSettings() && m_fontDescription.featureSettings()->size() > 0)
    356         return Complex;
    357 
    358     if (run.length() > 1 && !WidthIterator::supportsTypesettingFeatures(*this))
    359         return Complex;
    360 
    361     if (!run.characterScanForCodePath())
    362         return Simple;
    363 
    364     if (run.is8Bit())
    365         return Simple;
    366 
    367     // Start from 0 since drawing and highlighting also measure the characters before run->from.
    368     return characterRangeCodePath(run.characters16(), run.length());
    369 }
    370 
    371 static inline UChar keyExtractorUChar(const UChar* value)
    372 {
    373     return *value;
    374 }
    375 
    376 static inline UChar32 keyExtractorUChar32(const UChar32* value)
    377 {
    378     return *value;
    379 }
    380 
    381 Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned len)
    382 {
    383     static const UChar complexCodePathRanges[] = {
    384         // U+02E5 through U+02E9 (Modifier Letters : Tone letters)
    385         0x2E5, 0x2E9,
    386         // U+0300 through U+036F Combining diacritical marks
    387         0x300, 0x36F,
    388         // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ...
    389         0x0591, 0x05BD,
    390         // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha
    391         0x05BF, 0x05CF,
    392         // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic,
    393         // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada,
    394         // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar
    395         0x0600, 0x109F,
    396         // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left
    397         // here if you precompose; Modern Korean will be precomposed as a result of step A)
    398         0x1100, 0x11FF,
    399         // U+135D through U+135F Ethiopic combining marks
    400         0x135D, 0x135F,
    401         // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian
    402         0x1700, 0x18AF,
    403         // U+1900 through U+194F Limbu (Unicode 4.0)
    404         0x1900, 0x194F,
    405         // U+1980 through U+19DF New Tai Lue
    406         0x1980, 0x19DF,
    407         // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic
    408         0x1A00, 0x1CFF,
    409         // U+1DC0 through U+1DFF Comining diacritical mark supplement
    410         0x1DC0, 0x1DFF,
    411         // U+20D0 through U+20FF Combining marks for symbols
    412         0x20D0, 0x20FF,
    413         // U+2CEF through U+2CF1 Combining marks for Coptic
    414         0x2CEF, 0x2CF1,
    415         // U+302A through U+302F Ideographic and Hangul Tone marks
    416         0x302A, 0x302F,
    417         // U+A67C through U+A67D Combining marks for old Cyrillic
    418         0xA67C, 0xA67D,
    419         // U+A6F0 through U+A6F1 Combining mark for Bamum
    420         0xA6F0, 0xA6F1,
    421         // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended,
    422         // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek
    423         0xA800, 0xABFF,
    424         // U+D7B0 through U+D7FF Hangul Jamo Ext. B
    425         0xD7B0, 0xD7FF,
    426         // U+FE00 through U+FE0F Unicode variation selectors
    427         0xFE00, 0xFE0F,
    428         // U+FE20 through U+FE2F Combining half marks
    429         0xFE20, 0xFE2F
    430     };
    431     static size_t complexCodePathRangesCount = WTF_ARRAY_LENGTH(complexCodePathRanges);
    432 
    433     CodePath result = Simple;
    434     for (unsigned i = 0; i < len; i++) {
    435         const UChar c = characters[i];
    436 
    437         // Shortcut for common case
    438         if (c < 0x2E5)
    439             continue;
    440 
    441         // U+1E00 through U+2000 characters with diacritics and stacked diacritics
    442         if (c >= 0x1E00 && c <= 0x2000) {
    443             result = SimpleWithGlyphOverflow;
    444             continue;
    445         }
    446 
    447         // Surrogate pairs
    448         if (c > 0xD7FF && c <= 0xDBFF) {
    449             if (i == len - 1)
    450                 continue;
    451 
    452             UChar next = characters[++i];
    453             if (!U16_IS_TRAIL(next))
    454                 continue;
    455 
    456             UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next);
    457 
    458             if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Regional Indicator Symbols
    459                 continue;
    460             if (supplementaryCharacter <= 0x1F1FF)
    461                 return Complex;
    462 
    463             if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Unicode variation selectors.
    464                 continue;
    465             if (supplementaryCharacter <= 0xE01EF)
    466                 return Complex;
    467 
    468             // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) and other complex scripts
    469             // in plane 1 or higher.
    470 
    471             continue;
    472         }
    473 
    474         // Search for other Complex cases
    475         UChar* boundingCharacter = approximateBinarySearch<UChar, UChar>(
    476             (UChar*)complexCodePathRanges, complexCodePathRangesCount, c, keyExtractorUChar);
    477         // Exact matches are complex
    478         if (*boundingCharacter == c)
    479             return Complex;
    480         bool isEndOfRange = ((boundingCharacter - complexCodePathRanges) % 2);
    481         if (*boundingCharacter < c) {
    482             // Determine if we are in a range or out
    483             if (!isEndOfRange)
    484                 return Complex;
    485             continue;
    486         }
    487         ASSERT(*boundingCharacter > c);
    488         // Determine if we are in a range or out - opposite condition to above
    489         if (isEndOfRange)
    490             return Complex;
    491     }
    492 
    493     return result;
    494 }
    495 
    496 bool Font::isCJKIdeograph(UChar32 c)
    497 {
    498     static const UChar32 cjkIdeographRanges[] = {
    499         // CJK Radicals Supplement and Kangxi Radicals.
    500         0x2E80, 0x2FDF,
    501         // CJK Strokes.
    502         0x31C0, 0x31EF,
    503         // CJK Unified Ideographs Extension A.
    504         0x3400, 0x4DBF,
    505         // The basic CJK Unified Ideographs block.
    506         0x4E00, 0x9FFF,
    507         // CJK Compatibility Ideographs.
    508         0xF900, 0xFAFF,
    509         // CJK Unified Ideographs Extension B.
    510         0x20000, 0x2A6DF,
    511         // CJK Unified Ideographs Extension C.
    512         // CJK Unified Ideographs Extension D.
    513         0x2A700, 0x2B81F,
    514         // CJK Compatibility Ideographs Supplement.
    515         0x2F800, 0x2FA1F
    516     };
    517     static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges);
    518 
    519     // Early out
    520     if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCount - 1])
    521         return false;
    522 
    523     UChar32* boundingCharacter = approximateBinarySearch<UChar32, UChar32>(
    524         (UChar32*)cjkIdeographRanges, cjkIdeographRangesCount, c, keyExtractorUChar32);
    525     // Exact matches are CJK
    526     if (*boundingCharacter == c)
    527         return true;
    528     bool isEndOfRange = ((boundingCharacter - cjkIdeographRanges) % 2);
    529     if (*boundingCharacter < c)
    530         return !isEndOfRange;
    531     return isEndOfRange;
    532 }
    533 
    534 bool Font::isCJKIdeographOrSymbol(UChar32 c)
    535 {
    536     // Likely common case
    537     if (c < 0x2C7)
    538         return false;
    539 
    540     // Hash lookup for isolated symbols (those not part of a contiguous range)
    541     static HashSet<UChar32>* cjkIsolatedSymbols = 0;
    542     if (!cjkIsolatedSymbols) {
    543         cjkIsolatedSymbols = new HashSet<UChar32>();
    544         for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i)
    545             cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]);
    546     }
    547     if (cjkIsolatedSymbols->contains(c))
    548         return true;
    549 
    550     if (isCJKIdeograph(c))
    551         return true;
    552 
    553     static const UChar32 cjkSymbolRanges[] = {
    554         0x2156, 0x215A,
    555         0x2160, 0x216B,
    556         0x2170, 0x217B,
    557         0x23BE, 0x23CC,
    558         0x2460, 0x2492,
    559         0x249C, 0x24FF,
    560         0x25CE, 0x25D3,
    561         0x25E2, 0x25E6,
    562         0x2600, 0x2603,
    563         0x2660, 0x266F,
    564         0x2672, 0x267D,
    565         0x2776, 0x277F,
    566         // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030.
    567         // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x3100 .. 0x312F
    568         0x2FF0, 0x302F,
    569         0x3031, 0x312F,
    570         // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
    571         0x3190, 0x31BF,
    572         // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
    573         // CJK Compatibility (0x3300 .. 0x33FF).
    574         0x3200, 0x33FF,
    575         0xF860, 0xF862,
    576         // CJK Compatibility Forms.
    577         0xFE30, 0xFE4F,
    578         // Halfwidth and Fullwidth Forms
    579         // Usually only used in CJK
    580         0xFF00, 0xFF0C,
    581         0xFF0E, 0xFF1A,
    582         0xFF1F, 0xFFEF,
    583         // Emoji.
    584         0x1F110, 0x1F129,
    585         0x1F130, 0x1F149,
    586         0x1F150, 0x1F169,
    587         0x1F170, 0x1F189,
    588         0x1F200, 0x1F6FF
    589     };
    590     static size_t cjkSymbolRangesCount = WTF_ARRAY_LENGTH(cjkSymbolRanges);
    591 
    592     UChar32* boundingCharacter = approximateBinarySearch<UChar32, UChar32>(
    593         (UChar32*)cjkSymbolRanges, cjkSymbolRangesCount, c, keyExtractorUChar32);
    594     // Exact matches are CJK Symbols
    595     if (*boundingCharacter == c)
    596         return true;
    597     bool isEndOfRange = ((boundingCharacter - cjkSymbolRanges) % 2);
    598     if (*boundingCharacter < c)
    599         return !isEndOfRange;
    600     return isEndOfRange;
    601 }
    602 
    603 unsigned Font::expansionOpportunityCount(const LChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion)
    604 {
    605     unsigned count = 0;
    606     if (direction == LTR) {
    607         for (size_t i = 0; i < length; ++i) {
    608             if (treatAsSpace(characters[i])) {
    609                 count++;
    610                 isAfterExpansion = true;
    611             } else
    612                 isAfterExpansion = false;
    613         }
    614     } else {
    615         for (size_t i = length; i > 0; --i) {
    616             if (treatAsSpace(characters[i - 1])) {
    617                 count++;
    618                 isAfterExpansion = true;
    619             } else
    620                 isAfterExpansion = false;
    621         }
    622     }
    623     return count;
    624 }
    625 
    626 unsigned Font::expansionOpportunityCount(const UChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion)
    627 {
    628     static bool expandAroundIdeographs = canExpandAroundIdeographsInComplexText();
    629     unsigned count = 0;
    630     if (direction == LTR) {
    631         for (size_t i = 0; i < length; ++i) {
    632             UChar32 character = characters[i];
    633             if (treatAsSpace(character)) {
    634                 count++;
    635                 isAfterExpansion = true;
    636                 continue;
    637             }
    638             if (U16_IS_LEAD(character) && i + 1 < length && U16_IS_TRAIL(characters[i + 1])) {
    639                 character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]);
    640                 i++;
    641             }
    642             if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) {
    643                 if (!isAfterExpansion)
    644                     count++;
    645                 count++;
    646                 isAfterExpansion = true;
    647                 continue;
    648             }
    649             isAfterExpansion = false;
    650         }
    651     } else {
    652         for (size_t i = length; i > 0; --i) {
    653             UChar32 character = characters[i - 1];
    654             if (treatAsSpace(character)) {
    655                 count++;
    656                 isAfterExpansion = true;
    657                 continue;
    658             }
    659             if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2])) {
    660                 character = U16_GET_SUPPLEMENTARY(characters[i - 2], character);
    661                 i--;
    662             }
    663             if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) {
    664                 if (!isAfterExpansion)
    665                     count++;
    666                 count++;
    667                 isAfterExpansion = true;
    668                 continue;
    669             }
    670             isAfterExpansion = false;
    671         }
    672     }
    673     return count;
    674 }
    675 
    676 bool Font::canReceiveTextEmphasis(UChar32 c)
    677 {
    678     CharCategory category = Unicode::category(c);
    679     if (category & (Separator_Space | Separator_Line | Separator_Paragraph | Other_NotAssigned | Other_Control | Other_Format))
    680         return false;
    681 
    682     // Additional word-separator characters listed in CSS Text Level 3 Editor's Draft 3 November 2010.
    683     if (c == ethiopicWordspace || c == aegeanWordSeparatorLine || c == aegeanWordSeparatorDot
    684         || c == ugariticWordDivider || c == tibetanMarkIntersyllabicTsheg || c == tibetanMarkDelimiterTshegBstar)
    685         return false;
    686 
    687     return true;
    688 }
    689 
    690 void Font::willUseFontData() const
    691 {
    692     const FontFamily& family = fontDescription().family();
    693     if (m_fontFallbackList && m_fontFallbackList->fontSelector() && !family.familyIsEmpty())
    694         m_fontFallbackList->fontSelector()->willUseFontData(fontDescription(), family.family());
    695 }
    696 
    697 }
    698