1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * (C) 1999 Antti Koivisto (koivisto (at) kde.org) 4 * (C) 2000 Dirk Mueller (mueller (at) kde.org) 5 * Copyright (C) 2003, 2006, 2010, 2011 Apple Inc. All rights reserved. 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Library General Public 9 * License as published by the Free Software Foundation; either 10 * version 2 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Library General Public License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this library; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20 * Boston, MA 02110-1301, USA. 21 * 22 */ 23 24 #include "config.h" 25 #include "core/platform/graphics/Font.h" 26 27 #include "core/platform/graphics/FloatRect.h" 28 #include "core/platform/graphics/TextRun.h" 29 #include "core/platform/graphics/WidthIterator.h" 30 #include "core/platform/text/transcoder/FontTranscoder.h" 31 #include "wtf/MainThread.h" 32 #include "wtf/MathExtras.h" 33 #include "wtf/StdLibExtras.h" 34 #include "wtf/UnusedParam.h" 35 #include "wtf/text/StringBuilder.h" 36 37 using namespace WTF; 38 using namespace Unicode; 39 40 namespace WTF { 41 42 // allow compilation of OwnPtr<TextLayout> in source files that don't have access to the TextLayout class definition 43 template <> void deleteOwnedPtr<WebCore::TextLayout>(WebCore::TextLayout* ptr) 44 { 45 WebCore::Font::deleteLayout(ptr); 46 } 47 48 } 49 50 namespace WebCore { 51 52 const uint8_t Font::s_roundingHackCharacterTable[256] = { 53 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*\t*/, 1 /*\n*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54 1 /*space*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*-*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 /*?*/, 55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58 1 /*no-break space*/, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 61 }; 62 63 static const UChar32 cjkIsolatedSymbolsArray[] = { 64 // 0x2C7 Caron, Mandarin Chinese 3rd Tone 65 0x2C7, 66 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone 67 0x2CA, 68 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone 69 0x2CB, 70 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone 71 0x2D9, 72 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x2051, 73 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x2121, 74 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23CE, 75 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25B6, 76 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25CC, 77 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26BD, 78 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE12, 79 0xFE19, 0xFF1D, 80 // Emoji. 81 0x1F100 82 }; 83 84 Font::CodePath Font::s_codePath = Auto; 85 86 TypesettingFeatures Font::s_defaultTypesettingFeatures = 0; 87 88 // ============================================================================================ 89 // Font Implementation (Cross-Platform Portion) 90 // ============================================================================================ 91 92 Font::Font() 93 : m_letterSpacing(0) 94 , m_wordSpacing(0) 95 , m_isPlatformFont(false) 96 , m_needsTranscoding(false) 97 , m_typesettingFeatures(0) 98 { 99 } 100 101 Font::Font(const FontDescription& fd, float letterSpacing, float wordSpacing) 102 : m_fontDescription(fd) 103 , m_letterSpacing(letterSpacing) 104 , m_wordSpacing(wordSpacing) 105 , m_isPlatformFont(false) 106 , m_needsTranscoding(fontTranscoder().needsTranscoding(fd)) 107 , m_typesettingFeatures(computeTypesettingFeatures()) 108 { 109 } 110 111 Font::Font(const FontPlatformData& fontData, bool isPrinterFont, FontSmoothingMode fontSmoothingMode) 112 : m_fontFallbackList(FontFallbackList::create()) 113 , m_letterSpacing(0) 114 , m_wordSpacing(0) 115 , m_isPlatformFont(true) 116 , m_typesettingFeatures(computeTypesettingFeatures()) 117 { 118 m_fontDescription.setUsePrinterFont(isPrinterFont); 119 m_fontDescription.setFontSmoothing(fontSmoothingMode); 120 m_needsTranscoding = fontTranscoder().needsTranscoding(fontDescription()); 121 m_fontFallbackList->setPlatformFont(fontData); 122 } 123 124 Font::Font(const Font& other) 125 : m_fontDescription(other.m_fontDescription) 126 , m_fontFallbackList(other.m_fontFallbackList) 127 , m_letterSpacing(other.m_letterSpacing) 128 , m_wordSpacing(other.m_wordSpacing) 129 , m_isPlatformFont(other.m_isPlatformFont) 130 , m_needsTranscoding(other.m_needsTranscoding) 131 , m_typesettingFeatures(computeTypesettingFeatures()) 132 { 133 } 134 135 Font& Font::operator=(const Font& other) 136 { 137 m_fontDescription = other.m_fontDescription; 138 m_fontFallbackList = other.m_fontFallbackList; 139 m_letterSpacing = other.m_letterSpacing; 140 m_wordSpacing = other.m_wordSpacing; 141 m_isPlatformFont = other.m_isPlatformFont; 142 m_needsTranscoding = other.m_needsTranscoding; 143 m_typesettingFeatures = other.m_typesettingFeatures; 144 return *this; 145 } 146 147 bool Font::operator==(const Font& other) const 148 { 149 // Our FontData don't have to be checked, since checking the font description will be fine. 150 // FIXME: This does not work if the font was made with the FontPlatformData constructor. 151 if (loadingCustomFonts() || other.loadingCustomFonts()) 152 return false; 153 154 FontSelector* first = m_fontFallbackList ? m_fontFallbackList->fontSelector() : 0; 155 FontSelector* second = other.m_fontFallbackList ? other.m_fontFallbackList->fontSelector() : 0; 156 157 return first == second 158 && m_fontDescription == other.m_fontDescription 159 && m_letterSpacing == other.m_letterSpacing 160 && m_wordSpacing == other.m_wordSpacing 161 && (m_fontFallbackList ? m_fontFallbackList->fontSelectorVersion() : 0) == (other.m_fontFallbackList ? other.m_fontFallbackList->fontSelectorVersion() : 0) 162 && (m_fontFallbackList ? m_fontFallbackList->generation() : 0) == (other.m_fontFallbackList ? other.m_fontFallbackList->generation() : 0); 163 } 164 165 void Font::update(PassRefPtr<FontSelector> fontSelector) const 166 { 167 // FIXME: It is pretty crazy that we are willing to just poke into a RefPtr, but it ends up 168 // being reasonably safe (because inherited fonts in the render tree pick up the new 169 // style anyway. Other copies are transient, e.g., the state in the GraphicsContext, and 170 // won't stick around long enough to get you in trouble). Still, this is pretty disgusting, 171 // and could eventually be rectified by using RefPtrs for Fonts themselves. 172 if (!m_fontFallbackList) 173 m_fontFallbackList = FontFallbackList::create(); 174 m_fontFallbackList->invalidate(fontSelector); 175 m_typesettingFeatures = computeTypesettingFeatures(); 176 } 177 178 void Font::drawText(GraphicsContext* context, const TextRunPaintInfo& runInfo, const FloatPoint& point, CustomFontNotReadyAction customFontNotReadyAction) const 179 { 180 // Don't draw anything while we are using custom fonts that are in the process of loading, 181 // except if the 'force' argument is set to true (in which case it will use a fallback 182 // font). 183 if (loadingCustomFonts() && customFontNotReadyAction == DoNotPaintIfFontNotReady) 184 return; 185 186 CodePath codePathToUse = codePath(runInfo.run); 187 // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050 188 if (codePathToUse != Complex && typesettingFeatures() && (runInfo.from || runInfo.to != runInfo.run.length())) 189 codePathToUse = Complex; 190 191 if (codePathToUse != Complex) 192 return drawSimpleText(context, runInfo, point); 193 194 return drawComplexText(context, runInfo, point); 195 } 196 197 void Font::drawEmphasisMarks(GraphicsContext* context, const TextRunPaintInfo& runInfo, const AtomicString& mark, const FloatPoint& point) const 198 { 199 if (loadingCustomFonts()) 200 return; 201 202 CodePath codePathToUse = codePath(runInfo.run); 203 // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050 204 if (codePathToUse != Complex && typesettingFeatures() && (runInfo.from || runInfo.to != runInfo.run.length())) 205 codePathToUse = Complex; 206 207 if (codePathToUse != Complex) 208 drawEmphasisMarksForSimpleText(context, runInfo, mark, point); 209 else 210 drawEmphasisMarksForComplexText(context, runInfo, mark, point); 211 } 212 213 float Font::width(const TextRun& run, HashSet<const SimpleFontData*>* fallbackFonts, GlyphOverflow* glyphOverflow) const 214 { 215 CodePath codePathToUse = codePath(run); 216 if (codePathToUse != Complex) { 217 // The complex path is more restrictive about returning fallback fonts than the simple path, so we need an explicit test to make their behaviors match. 218 if (!canReturnFallbackFontsForComplexText()) 219 fallbackFonts = 0; 220 // The simple path can optimize the case where glyph overflow is not observable. 221 if (codePathToUse != SimpleWithGlyphOverflow && (glyphOverflow && !glyphOverflow->computeBounds)) 222 glyphOverflow = 0; 223 } 224 225 bool hasKerningOrLigatures = typesettingFeatures() & (Kerning | Ligatures); 226 bool hasWordSpacingOrLetterSpacing = wordSpacing() || letterSpacing(); 227 float* cacheEntry = m_fontFallbackList->widthCache().add(run, std::numeric_limits<float>::quiet_NaN(), hasKerningOrLigatures, hasWordSpacingOrLetterSpacing, glyphOverflow); 228 if (cacheEntry && !std::isnan(*cacheEntry)) 229 return *cacheEntry; 230 231 float result; 232 if (codePathToUse == Complex) 233 result = floatWidthForComplexText(run, fallbackFonts, glyphOverflow); 234 else 235 result = floatWidthForSimpleText(run, fallbackFonts, glyphOverflow); 236 237 if (cacheEntry && (!fallbackFonts || fallbackFonts->isEmpty())) 238 *cacheEntry = result; 239 return result; 240 } 241 242 float Font::width(const TextRun& run, int& charsConsumed, String& glyphName) const 243 { 244 #if ENABLE(SVG_FONTS) 245 if (TextRun::RenderingContext* renderingContext = run.renderingContext()) 246 return renderingContext->floatWidthUsingSVGFont(*this, run, charsConsumed, glyphName); 247 #endif 248 249 charsConsumed = run.length(); 250 glyphName = ""; 251 return width(run); 252 } 253 254 #if !OS(DARWIN) 255 256 PassOwnPtr<TextLayout> Font::createLayout(RenderText*, float, bool) const 257 { 258 return nullptr; 259 } 260 261 void Font::deleteLayout(TextLayout*) 262 { 263 } 264 265 float Font::width(TextLayout&, unsigned, unsigned, HashSet<const SimpleFontData*>*) 266 { 267 ASSERT_NOT_REACHED(); 268 return 0; 269 } 270 271 #endif 272 273 FloatRect Font::selectionRectForText(const TextRun& run, const FloatPoint& point, int h, int from, int to) const 274 { 275 to = (to == -1 ? run.length() : to); 276 277 CodePath codePathToUse = codePath(run); 278 // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050 279 if (codePathToUse != Complex && typesettingFeatures() && (from || to != run.length())) 280 codePathToUse = Complex; 281 282 if (codePathToUse != Complex) 283 return selectionRectForSimpleText(run, point, h, from, to); 284 285 return selectionRectForComplexText(run, point, h, from, to); 286 } 287 288 int Font::offsetForPosition(const TextRun& run, float x, bool includePartialGlyphs) const 289 { 290 // FIXME: Use the fast code path once it handles partial runs with kerning and ligatures. See http://webkit.org/b/100050 291 if (codePath(run) != Complex && !typesettingFeatures()) 292 return offsetForPositionForSimpleText(run, x, includePartialGlyphs); 293 294 return offsetForPositionForComplexText(run, x, includePartialGlyphs); 295 } 296 297 template <typename CharacterType> 298 static inline String normalizeSpacesInternal(const CharacterType* characters, unsigned length) 299 { 300 StringBuilder normalized; 301 normalized.reserveCapacity(length); 302 303 for (unsigned i = 0; i < length; ++i) 304 normalized.append(Font::normalizeSpaces(characters[i])); 305 306 return normalized.toString(); 307 } 308 309 String Font::normalizeSpaces(const LChar* characters, unsigned length) 310 { 311 return normalizeSpacesInternal(characters, length); 312 } 313 314 String Font::normalizeSpaces(const UChar* characters, unsigned length) 315 { 316 return normalizeSpacesInternal(characters, length); 317 } 318 319 static bool shouldUseFontSmoothing = true; 320 321 void Font::setShouldUseSmoothing(bool shouldUseSmoothing) 322 { 323 ASSERT(isMainThread()); 324 shouldUseFontSmoothing = shouldUseSmoothing; 325 } 326 327 bool Font::shouldUseSmoothing() 328 { 329 return shouldUseFontSmoothing; 330 } 331 332 void Font::setCodePath(CodePath p) 333 { 334 s_codePath = p; 335 } 336 337 Font::CodePath Font::codePath() 338 { 339 return s_codePath; 340 } 341 342 void Font::setDefaultTypesettingFeatures(TypesettingFeatures typesettingFeatures) 343 { 344 s_defaultTypesettingFeatures = typesettingFeatures; 345 } 346 347 TypesettingFeatures Font::defaultTypesettingFeatures() 348 { 349 return s_defaultTypesettingFeatures; 350 } 351 352 Font::CodePath Font::codePath(const TextRun& run) const 353 { 354 if (s_codePath != Auto) 355 return s_codePath; 356 357 #if ENABLE(SVG_FONTS) 358 if (run.renderingContext()) 359 return Simple; 360 #endif 361 362 if (m_fontDescription.featureSettings() && m_fontDescription.featureSettings()->size() > 0) 363 return Complex; 364 365 if (run.length() > 1 && !WidthIterator::supportsTypesettingFeatures(*this)) 366 return Complex; 367 368 if (!run.characterScanForCodePath()) 369 return Simple; 370 371 if (run.is8Bit()) 372 return Simple; 373 374 // Start from 0 since drawing and highlighting also measure the characters before run->from. 375 return characterRangeCodePath(run.characters16(), run.length()); 376 } 377 378 static inline UChar keyExtractorUChar(const UChar* value) 379 { 380 return *value; 381 } 382 383 static inline UChar32 keyExtractorUChar32(const UChar32* value) 384 { 385 return *value; 386 } 387 388 Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned len) 389 { 390 static UChar complexCodePathRanges[] = { 391 // U+02E5 through U+02E9 (Modifier Letters : Tone letters) 392 0x2E5, 0x2E9, 393 // U+0300 through U+036F Combining diacritical marks 394 0x300, 0x36F, 395 // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ... 396 0x0591, 0x05BD, 397 // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha 398 0x05BF, 0x05CF, 399 // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic, 400 // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada, 401 // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar 402 0x0600, 0x109F, 403 // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left 404 // here if you precompose; Modern Korean will be precomposed as a result of step A) 405 0x1100, 0x11FF, 406 // U+135D through U+135F Ethiopic combining marks 407 0x135D, 0x135F, 408 // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian 409 0x1700, 0x18AF, 410 // U+1900 through U+194F Limbu (Unicode 4.0) 411 0x1900, 0x194F, 412 // U+1980 through U+19DF New Tai Lue 413 0x1980, 0x19DF, 414 // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic 415 0x1A00, 0x1CFF, 416 // U+1DC0 through U+1DFF Comining diacritical mark supplement 417 0x1DC0, 0x1DFF, 418 // U+20D0 through U+20FF Combining marks for symbols 419 0x20D0, 0x20FF, 420 // U+2CEF through U+2CF1 Combining marks for Coptic 421 0x2CEF, 0x2CF1, 422 // U+302A through U+302F Ideographic and Hangul Tone marks 423 0x302A, 0x302F, 424 // U+A67C through U+A67D Combining marks for old Cyrillic 425 0xA67C, 0xA67D, 426 // U+A6F0 through U+A6F1 Combining mark for Bamum 427 0xA6F0, 0xA6F1, 428 // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended, 429 // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek 430 0xA800, 0xABFF, 431 // U+D7B0 through U+D7FF Hangul Jamo Ext. B 432 0xD7B0, 0xD7FF, 433 // U+FE00 through U+FE0F Unicode variation selectors 434 0xFE00, 0xFE0F, 435 // U+FE20 through U+FE2F Combining half marks 436 0xFE20, 0xFE2F 437 }; 438 static size_t complexCodePathRangesCount = WTF_ARRAY_LENGTH(complexCodePathRanges); 439 440 CodePath result = Simple; 441 for (unsigned i = 0; i < len; i++) { 442 const UChar c = characters[i]; 443 444 // Shortcut for common case 445 if (c < 0x2E5) 446 continue; 447 448 // U+1E00 through U+2000 characters with diacritics and stacked diacritics 449 if (c >= 0x1E00 && c <= 0x2000) { 450 result = SimpleWithGlyphOverflow; 451 continue; 452 } 453 454 // Surrogate pairs 455 if (c > 0xD7FF && c <= 0xDBFF) { 456 if (i == len - 1) 457 continue; 458 459 UChar next = characters[++i]; 460 if (!U16_IS_TRAIL(next)) 461 continue; 462 463 UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next); 464 465 if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Regional Indicator Symbols 466 continue; 467 if (supplementaryCharacter <= 0x1F1FF) 468 return Complex; 469 470 if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Unicode variation selectors. 471 continue; 472 if (supplementaryCharacter <= 0xE01EF) 473 return Complex; 474 475 // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) and other complex scripts 476 // in plane 1 or higher. 477 478 continue; 479 } 480 481 // Search for other Complex cases 482 UChar* boundingCharacter = approximateBinarySearch<UChar, UChar>( 483 (UChar*)complexCodePathRanges, complexCodePathRangesCount, c, keyExtractorUChar); 484 // Exact matches are complex 485 if (*boundingCharacter == c) 486 return Complex; 487 bool isEndOfRange = ((boundingCharacter - complexCodePathRanges) % 2); 488 if (*boundingCharacter < c) { 489 // Determine if we are in a range or out 490 if (!isEndOfRange) 491 return Complex; 492 continue; 493 } 494 ASSERT(*boundingCharacter > c); 495 // Determine if we are in a range or out - opposite condition to above 496 if (isEndOfRange) 497 return Complex; 498 } 499 500 return result; 501 } 502 503 bool Font::isCJKIdeograph(UChar32 c) 504 { 505 static UChar32 cjkIdeographRanges[] = { 506 // CJK Radicals Supplement and Kangxi Radicals. 507 0x2E80, 0x2FDF, 508 // CJK Strokes. 509 0x31C0, 0x31EF, 510 // CJK Unified Ideographs Extension A. 511 0x3400, 0x4DBF, 512 // The basic CJK Unified Ideographs block. 513 0x4E00, 0x9FFF, 514 // CJK Compatibility Ideographs. 515 0xF900, 0xFAFF, 516 // CJK Unified Ideographs Extension B. 517 0x20000, 0x2A6DF, 518 // CJK Unified Ideographs Extension C. 519 // CJK Unified Ideographs Extension D. 520 0x2A700, 0x2B81F, 521 // CJK Compatibility Ideographs Supplement. 522 0x2F800, 0x2FA1F 523 }; 524 static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges); 525 526 // Early out 527 if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCount - 1]) 528 return false; 529 530 UChar32* boundingCharacter = approximateBinarySearch<UChar32, UChar32>( 531 (UChar32*)cjkIdeographRanges, cjkIdeographRangesCount, c, keyExtractorUChar32); 532 // Exact matches are CJK 533 if (*boundingCharacter == c) 534 return true; 535 bool isEndOfRange = ((boundingCharacter - cjkIdeographRanges) % 2); 536 if (*boundingCharacter < c) 537 return !isEndOfRange; 538 return isEndOfRange; 539 } 540 541 bool Font::isCJKIdeographOrSymbol(UChar32 c) 542 { 543 // Likely common case 544 if (c < 0x2C7) 545 return false; 546 547 // Hash lookup for isolated symbols (those not part of a contiguous range) 548 static HashSet<UChar32>* cjkIsolatedSymbols = 0; 549 if (!cjkIsolatedSymbols) { 550 cjkIsolatedSymbols = new HashSet<UChar32>(); 551 for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i) 552 cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]); 553 } 554 if (cjkIsolatedSymbols->contains(c)) 555 return true; 556 557 if (isCJKIdeograph(c)) 558 return true; 559 560 static UChar32 cjkSymbolRanges[] = { 561 0x2156, 0x215A, 562 0x2160, 0x216B, 563 0x2170, 0x217B, 564 0x23BE, 0x23CC, 565 0x2460, 0x2492, 566 0x249C, 0x24FF, 567 0x25CE, 0x25D3, 568 0x25E2, 0x25E6, 569 0x2600, 0x2603, 570 0x2660, 0x266F, 571 0x2672, 0x267D, 572 0x2776, 0x277F, 573 // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030. 574 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x3100 .. 0x312F 575 0x2FF0, 0x302F, 576 0x3031, 0x312F, 577 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF 578 0x3190, 0x31BF, 579 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF). 580 // CJK Compatibility (0x3300 .. 0x33FF). 581 0x3200, 0x33FF, 582 0xF860, 0xF862, 583 // CJK Compatibility Forms. 584 0xFE30, 0xFE4F, 585 // Halfwidth and Fullwidth Forms 586 // Usually only used in CJK 587 0xFF00, 0xFF0C, 588 0xFF0E, 0xFF1A, 589 0xFF1F, 0xFFEF, 590 // Emoji. 591 0x1F110, 0x1F129, 592 0x1F130, 0x1F149, 593 0x1F150, 0x1F169, 594 0x1F170, 0x1F189, 595 0x1F200, 0x1F6FF 596 }; 597 static size_t cjkSymbolRangesCount = WTF_ARRAY_LENGTH(cjkSymbolRanges); 598 599 UChar32* boundingCharacter = approximateBinarySearch<UChar32, UChar32>( 600 (UChar32*)cjkSymbolRanges, cjkSymbolRangesCount, c, keyExtractorUChar32); 601 // Exact matches are CJK Symbols 602 if (*boundingCharacter == c) 603 return true; 604 bool isEndOfRange = ((boundingCharacter - cjkSymbolRanges) % 2); 605 if (*boundingCharacter < c) 606 return !isEndOfRange; 607 return isEndOfRange; 608 } 609 610 unsigned Font::expansionOpportunityCount(const LChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion) 611 { 612 unsigned count = 0; 613 if (direction == LTR) { 614 for (size_t i = 0; i < length; ++i) { 615 if (treatAsSpace(characters[i])) { 616 count++; 617 isAfterExpansion = true; 618 } else 619 isAfterExpansion = false; 620 } 621 } else { 622 for (size_t i = length; i > 0; --i) { 623 if (treatAsSpace(characters[i - 1])) { 624 count++; 625 isAfterExpansion = true; 626 } else 627 isAfterExpansion = false; 628 } 629 } 630 return count; 631 } 632 633 unsigned Font::expansionOpportunityCount(const UChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion) 634 { 635 static bool expandAroundIdeographs = canExpandAroundIdeographsInComplexText(); 636 unsigned count = 0; 637 if (direction == LTR) { 638 for (size_t i = 0; i < length; ++i) { 639 UChar32 character = characters[i]; 640 if (treatAsSpace(character)) { 641 count++; 642 isAfterExpansion = true; 643 continue; 644 } 645 if (U16_IS_LEAD(character) && i + 1 < length && U16_IS_TRAIL(characters[i + 1])) { 646 character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]); 647 i++; 648 } 649 if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) { 650 if (!isAfterExpansion) 651 count++; 652 count++; 653 isAfterExpansion = true; 654 continue; 655 } 656 isAfterExpansion = false; 657 } 658 } else { 659 for (size_t i = length; i > 0; --i) { 660 UChar32 character = characters[i - 1]; 661 if (treatAsSpace(character)) { 662 count++; 663 isAfterExpansion = true; 664 continue; 665 } 666 if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2])) { 667 character = U16_GET_SUPPLEMENTARY(characters[i - 2], character); 668 i--; 669 } 670 if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) { 671 if (!isAfterExpansion) 672 count++; 673 count++; 674 isAfterExpansion = true; 675 continue; 676 } 677 isAfterExpansion = false; 678 } 679 } 680 return count; 681 } 682 683 bool Font::canReceiveTextEmphasis(UChar32 c) 684 { 685 CharCategory category = Unicode::category(c); 686 if (category & (Separator_Space | Separator_Line | Separator_Paragraph | Other_NotAssigned | Other_Control | Other_Format)) 687 return false; 688 689 // Additional word-separator characters listed in CSS Text Level 3 Editor's Draft 3 November 2010. 690 if (c == ethiopicWordspace || c == aegeanWordSeparatorLine || c == aegeanWordSeparatorDot 691 || c == ugariticWordDivider || c == tibetanMarkIntersyllabicTsheg || c == tibetanMarkDelimiterTshegBstar) 692 return false; 693 694 return true; 695 } 696 697 } 698