Home | History | Annotate | Download | only in graphics
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  *           (C) 1999 Antti Koivisto (koivisto (at) kde.org)
      4  *           (C) 2000 Dirk Mueller (mueller (at) kde.org)
      5  * Copyright (C) 2003, 2006, 2010, 2011 Apple Inc. All rights reserved.
      6  *
      7  * This library is free software; you can redistribute it and/or
      8  * modify it under the terms of the GNU Library General Public
      9  * License as published by the Free Software Foundation; either
     10  * version 2 of the License, or (at your option) any later version.
     11  *
     12  * This library is distributed in the hope that it will be useful,
     13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  * Library General Public License for more details.
     16  *
     17  * You should have received a copy of the GNU Library General Public License
     18  * along with this library; see the file COPYING.LIB.  If not, write to
     19  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     20  * Boston, MA 02110-1301, USA.
     21  *
     22  */
     23 
     24 #include "config.h"
     25 #include "core/platform/graphics/Font.h"
     26 
     27 #include "core/platform/graphics/FloatRect.h"
     28 #include "core/platform/graphics/TextRun.h"
     29 #include "core/platform/graphics/WidthIterator.h"
     30 #include "core/platform/text/transcoder/FontTranscoder.h"
     31 #include "wtf/MainThread.h"
     32 #include "wtf/MathExtras.h"
     33 #include "wtf/StdLibExtras.h"
     34 #include "wtf/UnusedParam.h"
     35 #include "wtf/text/StringBuilder.h"
     36 
     37 using namespace WTF;
     38 using namespace Unicode;
     39 
     40 namespace WTF {
     41 
     42 // allow compilation of OwnPtr<TextLayout> in source files that don't have access to the TextLayout class definition
     43 template <> void deleteOwnedPtr<WebCore::TextLayout>(WebCore::TextLayout* ptr)
     44 {
     45     WebCore::Font::deleteLayout(ptr);
     46 }
     47 
     48 }
     49 
     50 namespace WebCore {
     51 
     52 const uint8_t Font::s_roundingHackCharacterTable[256] = {
     53     0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*\t*/, 1 /*\n*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     54     1 /*space*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*-*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*?*/,
     55     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     56     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     57     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     58     1 /*no-break space*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     59     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     60     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
     61 };
     62 
     63 static const UChar32 cjkIsolatedSymbolsArray[] = {
     64     // 0x2C7 Caron, Mandarin Chinese 3rd Tone
     65     0x2C7,
     66     // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone
     67     0x2CA,
     68     // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone
     69     0x2CB,
     70     // 0x2D9 Dot Above, Mandarin Chinese 5th Tone
     71     0x2D9,
     72     0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x2051,
     73     0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x2121,
     74     0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23CE,
     75     0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25B6,
     76     0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25CC,
     77     0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26BD,
     78     0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE12,
     79     0xFE19, 0xFF1D,
     80     // Emoji.
     81     0x1F100
     82 };
     83 
     84 Font::CodePath Font::s_codePath = Auto;
     85 
     86 TypesettingFeatures Font::s_defaultTypesettingFeatures = 0;
     87 
     88 // ============================================================================================
     89 // Font Implementation (Cross-Platform Portion)
     90 // ============================================================================================
     91 
     92 Font::Font()
     93     : m_letterSpacing(0)
     94     , m_wordSpacing(0)
     95     , m_isPlatformFont(false)
     96     , m_needsTranscoding(false)
     97     , m_typesettingFeatures(0)
     98 {
     99 }
    100 
    101 Font::Font(const FontDescription& fd, float letterSpacing, float wordSpacing)
    102     : m_fontDescription(fd)
    103     , m_letterSpacing(letterSpacing)
    104     , m_wordSpacing(wordSpacing)
    105     , m_isPlatformFont(false)
    106     , m_needsTranscoding(fontTranscoder().needsTranscoding(fd))
    107     , m_typesettingFeatures(computeTypesettingFeatures())
    108 {
    109 }
    110 
    111 Font::Font(const FontPlatformData& fontData, bool isPrinterFont, FontSmoothingMode fontSmoothingMode)
    112     : m_fontFallbackList(FontFallbackList::create())
    113     , m_letterSpacing(0)
    114     , m_wordSpacing(0)
    115     , m_isPlatformFont(true)
    116     , m_typesettingFeatures(computeTypesettingFeatures())
    117 {
    118     m_fontDescription.setUsePrinterFont(isPrinterFont);
    119     m_fontDescription.setFontSmoothing(fontSmoothingMode);
    120     m_needsTranscoding = fontTranscoder().needsTranscoding(fontDescription());
    121     m_fontFallbackList->setPlatformFont(fontData);
    122 }
    123 
    124 Font::Font(const Font& other)
    125     : m_fontDescription(other.m_fontDescription)
    126     , m_fontFallbackList(other.m_fontFallbackList)
    127     , m_letterSpacing(other.m_letterSpacing)
    128     , m_wordSpacing(other.m_wordSpacing)
    129     , m_isPlatformFont(other.m_isPlatformFont)
    130     , m_needsTranscoding(other.m_needsTranscoding)
    131     , m_typesettingFeatures(computeTypesettingFeatures())
    132 {
    133 }
    134 
    135 Font& Font::operator=(const Font& other)
    136 {
    137     m_fontDescription = other.m_fontDescription;
    138     m_fontFallbackList = other.m_fontFallbackList;
    139     m_letterSpacing = other.m_letterSpacing;
    140     m_wordSpacing = other.m_wordSpacing;
    141     m_isPlatformFont = other.m_isPlatformFont;
    142     m_needsTranscoding = other.m_needsTranscoding;
    143     m_typesettingFeatures = other.m_typesettingFeatures;
    144     return *this;
    145 }
    146 
    147 bool Font::operator==(const Font& other) const
    148 {
    149     // Our FontData don't have to be checked, since checking the font description will be fine.
    150     // FIXME: This does not work if the font was made with the FontPlatformData constructor.
    151     if (loadingCustomFonts() || other.loadingCustomFonts())
    152         return false;
    153 
    154     FontSelector* first = m_fontFallbackList ? m_fontFallbackList->fontSelector() : 0;
    155     FontSelector* second = other.m_fontFallbackList ? other.m_fontFallbackList->fontSelector() : 0;
    156 
    157     return first == second
    158         && m_fontDescription == other.m_fontDescription
    159         && m_letterSpacing == other.m_letterSpacing
    160         && m_wordSpacing == other.m_wordSpacing
    161         && (m_fontFallbackList ? m_fontFallbackList->fontSelectorVersion() : 0) == (other.m_fontFallbackList ? other.m_fontFallbackList->fontSelectorVersion() : 0)
    162         && (m_fontFallbackList ? m_fontFallbackList->generation() : 0) == (other.m_fontFallbackList ? other.m_fontFallbackList->generation() : 0);
    163 }
    164 
    165 void Font::update(PassRefPtr<FontSelector> fontSelector) const
    166 {
    167     // FIXME: It is pretty crazy that we are willing to just poke into a RefPtr, but it ends up
    168     // being reasonably safe (because inherited fonts in the render tree pick up the new
    169     // style anyway. Other copies are transient, e.g., the state in the GraphicsContext, and
    170     // won't stick around long enough to get you in trouble). Still, this is pretty disgusting,
    171     // and could eventually be rectified by using RefPtrs for Fonts themselves.
    172     if (!m_fontFallbackList)
    173         m_fontFallbackList = FontFallbackList::create();
    174     m_fontFallbackList->invalidate(fontSelector);
    175     m_typesettingFeatures = computeTypesettingFeatures();
    176 }
    177 
    178 void Font::drawText(GraphicsContext* context, const TextRunPaintInfo& runInfo, const FloatPoint& point, CustomFontNotReadyAction customFontNotReadyAction) const
    179 {
    180     // Don't draw anything while we are using custom fonts that are in the process of loading,
    181     // except if the 'force' argument is set to true (in which case it will use a fallback
    182     // font).
    183     if (loadingCustomFonts() && customFontNotReadyAction == DoNotPaintIfFontNotReady)
    184         return;
    185 
    186     CodePath codePathToUse = codePath(runInfo.run);
    187     // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050
    188     if (codePathToUse != Complex && typesettingFeatures() && (runInfo.from || runInfo.to != runInfo.run.length()))
    189         codePathToUse = Complex;
    190 
    191     if (codePathToUse != Complex)
    192         return drawSimpleText(context, runInfo, point);
    193 
    194     return drawComplexText(context, runInfo, point);
    195 }
    196 
    197 void Font::drawEmphasisMarks(GraphicsContext* context, const TextRunPaintInfo& runInfo, const AtomicString& mark, const FloatPoint& point) const
    198 {
    199     if (loadingCustomFonts())
    200         return;
    201 
    202     CodePath codePathToUse = codePath(runInfo.run);
    203     // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050
    204     if (codePathToUse != Complex && typesettingFeatures() && (runInfo.from || runInfo.to != runInfo.run.length()))
    205         codePathToUse = Complex;
    206 
    207     if (codePathToUse != Complex)
    208         drawEmphasisMarksForSimpleText(context, runInfo, mark, point);
    209     else
    210         drawEmphasisMarksForComplexText(context, runInfo, mark, point);
    211 }
    212 
    213 float Font::width(const TextRun& run, HashSet<const SimpleFontData*>* fallbackFonts, GlyphOverflow* glyphOverflow) const
    214 {
    215     CodePath codePathToUse = codePath(run);
    216     if (codePathToUse != Complex) {
    217         // The complex path is more restrictive about returning fallback fonts than the simple path, so we need an explicit test to make their behaviors match.
    218         if (!canReturnFallbackFontsForComplexText())
    219             fallbackFonts = 0;
    220         // The simple path can optimize the case where glyph overflow is not observable.
    221         if (codePathToUse != SimpleWithGlyphOverflow && (glyphOverflow && !glyphOverflow->computeBounds))
    222             glyphOverflow = 0;
    223     }
    224 
    225     bool hasKerningOrLigatures = typesettingFeatures() & (Kerning | Ligatures);
    226     bool hasWordSpacingOrLetterSpacing = wordSpacing() || letterSpacing();
    227     float* cacheEntry = m_fontFallbackList->widthCache().add(run, std::numeric_limits<float>::quiet_NaN(), hasKerningOrLigatures, hasWordSpacingOrLetterSpacing, glyphOverflow);
    228     if (cacheEntry && !std::isnan(*cacheEntry))
    229         return *cacheEntry;
    230 
    231     float result;
    232     if (codePathToUse == Complex)
    233         result = floatWidthForComplexText(run, fallbackFonts, glyphOverflow);
    234     else
    235         result = floatWidthForSimpleText(run, fallbackFonts, glyphOverflow);
    236 
    237     if (cacheEntry && (!fallbackFonts || fallbackFonts->isEmpty()))
    238         *cacheEntry = result;
    239     return result;
    240 }
    241 
    242 float Font::width(const TextRun& run, int& charsConsumed, String& glyphName) const
    243 {
    244 #if ENABLE(SVG_FONTS)
    245     if (TextRun::RenderingContext* renderingContext = run.renderingContext())
    246         return renderingContext->floatWidthUsingSVGFont(*this, run, charsConsumed, glyphName);
    247 #endif
    248 
    249     charsConsumed = run.length();
    250     glyphName = "";
    251     return width(run);
    252 }
    253 
    254 #if !OS(DARWIN)
    255 
    256 PassOwnPtr<TextLayout> Font::createLayout(RenderText*, float, bool) const
    257 {
    258     return nullptr;
    259 }
    260 
    261 void Font::deleteLayout(TextLayout*)
    262 {
    263 }
    264 
    265 float Font::width(TextLayout&, unsigned, unsigned, HashSet<const SimpleFontData*>*)
    266 {
    267     ASSERT_NOT_REACHED();
    268     return 0;
    269 }
    270 
    271 #endif
    272 
    273 FloatRect Font::selectionRectForText(const TextRun& run, const FloatPoint& point, int h, int from, int to) const
    274 {
    275     to = (to == -1 ? run.length() : to);
    276 
    277     CodePath codePathToUse = codePath(run);
    278     // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050
    279     if (codePathToUse != Complex && typesettingFeatures() && (from || to != run.length()))
    280         codePathToUse = Complex;
    281 
    282     if (codePathToUse != Complex)
    283         return selectionRectForSimpleText(run, point, h, from, to);
    284 
    285     return selectionRectForComplexText(run, point, h, from, to);
    286 }
    287 
    288 int Font::offsetForPosition(const TextRun& run, float x, bool includePartialGlyphs) const
    289 {
    290     // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050
    291     if (codePath(run) != Complex && !typesettingFeatures())
    292         return offsetForPositionForSimpleText(run, x, includePartialGlyphs);
    293 
    294     return offsetForPositionForComplexText(run, x, includePartialGlyphs);
    295 }
    296 
    297 template <typename CharacterType>
    298 static inline String normalizeSpacesInternal(const CharacterType* characters, unsigned length)
    299 {
    300     StringBuilder normalized;
    301     normalized.reserveCapacity(length);
    302 
    303     for (unsigned i = 0; i < length; ++i)
    304         normalized.append(Font::normalizeSpaces(characters[i]));
    305 
    306     return normalized.toString();
    307 }
    308 
    309 String Font::normalizeSpaces(const LChar* characters, unsigned length)
    310 {
    311     return normalizeSpacesInternal(characters, length);
    312 }
    313 
    314 String Font::normalizeSpaces(const UChar* characters, unsigned length)
    315 {
    316     return normalizeSpacesInternal(characters, length);
    317 }
    318 
    319 static bool shouldUseFontSmoothing = true;
    320 
    321 void Font::setShouldUseSmoothing(bool shouldUseSmoothing)
    322 {
    323     ASSERT(isMainThread());
    324     shouldUseFontSmoothing = shouldUseSmoothing;
    325 }
    326 
    327 bool Font::shouldUseSmoothing()
    328 {
    329     return shouldUseFontSmoothing;
    330 }
    331 
    332 void Font::setCodePath(CodePath p)
    333 {
    334     s_codePath = p;
    335 }
    336 
    337 Font::CodePath Font::codePath()
    338 {
    339     return s_codePath;
    340 }
    341 
    342 void Font::setDefaultTypesettingFeatures(TypesettingFeatures typesettingFeatures)
    343 {
    344     s_defaultTypesettingFeatures = typesettingFeatures;
    345 }
    346 
    347 TypesettingFeatures Font::defaultTypesettingFeatures()
    348 {
    349     return s_defaultTypesettingFeatures;
    350 }
    351 
    352 Font::CodePath Font::codePath(const TextRun& run) const
    353 {
    354     if (s_codePath != Auto)
    355         return s_codePath;
    356 
    357 #if ENABLE(SVG_FONTS)
    358     if (run.renderingContext())
    359         return Simple;
    360 #endif
    361 
    362     if (m_fontDescription.featureSettings() && m_fontDescription.featureSettings()->size() > 0)
    363         return Complex;
    364 
    365     if (run.length() > 1 && !WidthIterator::supportsTypesettingFeatures(*this))
    366         return Complex;
    367 
    368     if (!run.characterScanForCodePath())
    369         return Simple;
    370 
    371     if (run.is8Bit())
    372         return Simple;
    373 
    374     // Start from 0 since drawing and highlighting also measure the characters before run->from.
    375     return characterRangeCodePath(run.characters16(), run.length());
    376 }
    377 
    378 static inline UChar keyExtractorUChar(const UChar* value)
    379 {
    380     return *value;
    381 }
    382 
    383 static inline UChar32 keyExtractorUChar32(const UChar32* value)
    384 {
    385     return *value;
    386 }
    387 
    388 Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned len)
    389 {
    390     static UChar complexCodePathRanges[] = {
    391         // U+02E5 through U+02E9 (Modifier Letters : Tone letters)
    392         0x2E5, 0x2E9,
    393         // U+0300 through U+036F Combining diacritical marks
    394         0x300, 0x36F,
    395         // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ...
    396         0x0591, 0x05BD,
    397         // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha
    398         0x05BF, 0x05CF,
    399         // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic,
    400         // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada,
    401         // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar
    402         0x0600, 0x109F,
    403         // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left
    404         // here if you precompose; Modern Korean will be precomposed as a result of step A)
    405         0x1100, 0x11FF,
    406         // U+135D through U+135F Ethiopic combining marks
    407         0x135D, 0x135F,
    408         // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian
    409         0x1700, 0x18AF,
    410         // U+1900 through U+194F Limbu (Unicode 4.0)
    411         0x1900, 0x194F,
    412         // U+1980 through U+19DF New Tai Lue
    413         0x1980, 0x19DF,
    414         // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic
    415         0x1A00, 0x1CFF,
    416         // U+1DC0 through U+1DFF Comining diacritical mark supplement
    417         0x1DC0, 0x1DFF,
    418         // U+20D0 through U+20FF Combining marks for symbols
    419         0x20D0, 0x20FF,
    420         // U+2CEF through U+2CF1 Combining marks for Coptic
    421         0x2CEF, 0x2CF1,
    422         // U+302A through U+302F Ideographic and Hangul Tone marks
    423         0x302A, 0x302F,
    424         // U+A67C through U+A67D Combining marks for old Cyrillic
    425         0xA67C, 0xA67D,
    426         // U+A6F0 through U+A6F1 Combining mark for Bamum
    427         0xA6F0, 0xA6F1,
    428         // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended,
    429         // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek
    430         0xA800, 0xABFF,
    431         // U+D7B0 through U+D7FF Hangul Jamo Ext. B
    432         0xD7B0, 0xD7FF,
    433         // U+FE00 through U+FE0F Unicode variation selectors
    434         0xFE00, 0xFE0F,
    435         // U+FE20 through U+FE2F Combining half marks
    436         0xFE20, 0xFE2F
    437     };
    438     static size_t complexCodePathRangesCount = WTF_ARRAY_LENGTH(complexCodePathRanges);
    439 
    440     CodePath result = Simple;
    441     for (unsigned i = 0; i < len; i++) {
    442         const UChar c = characters[i];
    443 
    444         // Shortcut for common case
    445         if (c < 0x2E5)
    446             continue;
    447 
    448         // U+1E00 through U+2000 characters with diacritics and stacked diacritics
    449         if (c >= 0x1E00 && c <= 0x2000) {
    450             result = SimpleWithGlyphOverflow;
    451             continue;
    452         }
    453 
    454         // Surrogate pairs
    455         if (c > 0xD7FF && c <= 0xDBFF) {
    456             if (i == len - 1)
    457                 continue;
    458 
    459             UChar next = characters[++i];
    460             if (!U16_IS_TRAIL(next))
    461                 continue;
    462 
    463             UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next);
    464 
    465             if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Regional Indicator Symbols
    466                 continue;
    467             if (supplementaryCharacter <= 0x1F1FF)
    468                 return Complex;
    469 
    470             if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Unicode variation selectors.
    471                 continue;
    472             if (supplementaryCharacter <= 0xE01EF)
    473                 return Complex;
    474 
    475             // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) and other complex scripts
    476             // in plane 1 or higher.
    477 
    478             continue;
    479         }
    480 
    481         // Search for other Complex cases
    482         UChar* boundingCharacter = approximateBinarySearch<UChar, UChar>(
    483             (UChar*)complexCodePathRanges, complexCodePathRangesCount, c, keyExtractorUChar);
    484         // Exact matches are complex
    485         if (*boundingCharacter == c)
    486             return Complex;
    487         bool isEndOfRange = ((boundingCharacter - complexCodePathRanges) % 2);
    488         if (*boundingCharacter < c) {
    489             // Determine if we are in a range or out
    490             if (!isEndOfRange)
    491                 return Complex;
    492             continue;
    493         }
    494         ASSERT(*boundingCharacter > c);
    495         // Determine if we are in a range or out - opposite condition to above
    496         if (isEndOfRange)
    497             return Complex;
    498     }
    499 
    500     return result;
    501 }
    502 
    503 bool Font::isCJKIdeograph(UChar32 c)
    504 {
    505     static UChar32 cjkIdeographRanges[] = {
    506         // CJK Radicals Supplement and Kangxi Radicals.
    507         0x2E80, 0x2FDF,
    508         // CJK Strokes.
    509         0x31C0, 0x31EF,
    510         // CJK Unified Ideographs Extension A.
    511         0x3400, 0x4DBF,
    512         // The basic CJK Unified Ideographs block.
    513         0x4E00, 0x9FFF,
    514         // CJK Compatibility Ideographs.
    515         0xF900, 0xFAFF,
    516         // CJK Unified Ideographs Extension B.
    517         0x20000, 0x2A6DF,
    518         // CJK Unified Ideographs Extension C.
    519         // CJK Unified Ideographs Extension D.
    520         0x2A700, 0x2B81F,
    521         // CJK Compatibility Ideographs Supplement.
    522         0x2F800, 0x2FA1F
    523     };
    524     static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges);
    525 
    526     // Early out
    527     if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCount - 1])
    528         return false;
    529 
    530     UChar32* boundingCharacter = approximateBinarySearch<UChar32, UChar32>(
    531         (UChar32*)cjkIdeographRanges, cjkIdeographRangesCount, c, keyExtractorUChar32);
    532     // Exact matches are CJK
    533     if (*boundingCharacter == c)
    534         return true;
    535     bool isEndOfRange = ((boundingCharacter - cjkIdeographRanges) % 2);
    536     if (*boundingCharacter < c)
    537         return !isEndOfRange;
    538     return isEndOfRange;
    539 }
    540 
    541 bool Font::isCJKIdeographOrSymbol(UChar32 c)
    542 {
    543     // Likely common case
    544     if (c < 0x2C7)
    545         return false;
    546 
    547     // Hash lookup for isolated symbols (those not part of a contiguous range)
    548     static HashSet<UChar32>* cjkIsolatedSymbols = 0;
    549     if (!cjkIsolatedSymbols) {
    550         cjkIsolatedSymbols = new HashSet<UChar32>();
    551         for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i)
    552             cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]);
    553     }
    554     if (cjkIsolatedSymbols->contains(c))
    555         return true;
    556 
    557     if (isCJKIdeograph(c))
    558         return true;
    559 
    560     static UChar32 cjkSymbolRanges[] = {
    561         0x2156, 0x215A,
    562         0x2160, 0x216B,
    563         0x2170, 0x217B,
    564         0x23BE, 0x23CC,
    565         0x2460, 0x2492,
    566         0x249C, 0x24FF,
    567         0x25CE, 0x25D3,
    568         0x25E2, 0x25E6,
    569         0x2600, 0x2603,
    570         0x2660, 0x266F,
    571         0x2672, 0x267D,
    572         0x2776, 0x277F,
    573         // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030.
    574         // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x3100 .. 0x312F
    575         0x2FF0, 0x302F,
    576         0x3031, 0x312F,
    577         // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
    578         0x3190, 0x31BF,
    579         // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
    580         // CJK Compatibility (0x3300 .. 0x33FF).
    581         0x3200, 0x33FF,
    582         0xF860, 0xF862,
    583         // CJK Compatibility Forms.
    584         0xFE30, 0xFE4F,
    585         // Halfwidth and Fullwidth Forms
    586         // Usually only used in CJK
    587         0xFF00, 0xFF0C,
    588         0xFF0E, 0xFF1A,
    589         0xFF1F, 0xFFEF,
    590         // Emoji.
    591         0x1F110, 0x1F129,
    592         0x1F130, 0x1F149,
    593         0x1F150, 0x1F169,
    594         0x1F170, 0x1F189,
    595         0x1F200, 0x1F6FF
    596     };
    597     static size_t cjkSymbolRangesCount = WTF_ARRAY_LENGTH(cjkSymbolRanges);
    598 
    599     UChar32* boundingCharacter = approximateBinarySearch<UChar32, UChar32>(
    600         (UChar32*)cjkSymbolRanges, cjkSymbolRangesCount, c, keyExtractorUChar32);
    601     // Exact matches are CJK Symbols
    602     if (*boundingCharacter == c)
    603         return true;
    604     bool isEndOfRange = ((boundingCharacter - cjkSymbolRanges) % 2);
    605     if (*boundingCharacter < c)
    606         return !isEndOfRange;
    607     return isEndOfRange;
    608 }
    609 
    610 unsigned Font::expansionOpportunityCount(const LChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion)
    611 {
    612     unsigned count = 0;
    613     if (direction == LTR) {
    614         for (size_t i = 0; i < length; ++i) {
    615             if (treatAsSpace(characters[i])) {
    616                 count++;
    617                 isAfterExpansion = true;
    618             } else
    619                 isAfterExpansion = false;
    620         }
    621     } else {
    622         for (size_t i = length; i > 0; --i) {
    623             if (treatAsSpace(characters[i - 1])) {
    624                 count++;
    625                 isAfterExpansion = true;
    626             } else
    627                 isAfterExpansion = false;
    628         }
    629     }
    630     return count;
    631 }
    632 
    633 unsigned Font::expansionOpportunityCount(const UChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion)
    634 {
    635     static bool expandAroundIdeographs = canExpandAroundIdeographsInComplexText();
    636     unsigned count = 0;
    637     if (direction == LTR) {
    638         for (size_t i = 0; i < length; ++i) {
    639             UChar32 character = characters[i];
    640             if (treatAsSpace(character)) {
    641                 count++;
    642                 isAfterExpansion = true;
    643                 continue;
    644             }
    645             if (U16_IS_LEAD(character) && i + 1 < length && U16_IS_TRAIL(characters[i + 1])) {
    646                 character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]);
    647                 i++;
    648             }
    649             if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) {
    650                 if (!isAfterExpansion)
    651                     count++;
    652                 count++;
    653                 isAfterExpansion = true;
    654                 continue;
    655             }
    656             isAfterExpansion = false;
    657         }
    658     } else {
    659         for (size_t i = length; i > 0; --i) {
    660             UChar32 character = characters[i - 1];
    661             if (treatAsSpace(character)) {
    662                 count++;
    663                 isAfterExpansion = true;
    664                 continue;
    665             }
    666             if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2])) {
    667                 character = U16_GET_SUPPLEMENTARY(characters[i - 2], character);
    668                 i--;
    669             }
    670             if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) {
    671                 if (!isAfterExpansion)
    672                     count++;
    673                 count++;
    674                 isAfterExpansion = true;
    675                 continue;
    676             }
    677             isAfterExpansion = false;
    678         }
    679     }
    680     return count;
    681 }
    682 
    683 bool Font::canReceiveTextEmphasis(UChar32 c)
    684 {
    685     CharCategory category = Unicode::category(c);
    686     if (category & (Separator_Space | Separator_Line | Separator_Paragraph | Other_NotAssigned | Other_Control | Other_Format))
    687         return false;
    688 
    689     // Additional word-separator characters listed in CSS Text Level 3 Editor's Draft 3 November 2010.
    690     if (c == ethiopicWordspace || c == aegeanWordSeparatorLine || c == aegeanWordSeparatorDot
    691         || c == ugariticWordDivider || c == tibetanMarkIntersyllabicTsheg || c == tibetanMarkDelimiterTshegBstar)
    692         return false;
    693 
    694     return true;
    695 }
    696 
    697 }
    698