1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // #define VERBOSE_DEBUG 18 19 #define LOG_TAG "Minikin" 20 21 #include <algorithm> 22 23 #include <log/log.h> 24 #include "unicode/unistr.h" 25 #include "unicode/unorm2.h" 26 27 #include "FontLanguage.h" 28 #include "FontLanguageListCache.h" 29 #include "MinikinInternal.h" 30 #include <minikin/Emoji.h> 31 #include <minikin/FontCollection.h> 32 33 using std::vector; 34 35 namespace minikin { 36 37 template <typename T> 38 static inline T max(T a, T b) { 39 return a>b ? a : b; 40 } 41 42 const uint32_t EMOJI_STYLE_VS = 0xFE0F; 43 const uint32_t TEXT_STYLE_VS = 0xFE0E; 44 45 uint32_t FontCollection::sNextId = 0; 46 47 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) { 48 std::vector<std::shared_ptr<FontFamily>> typefaces; 49 typefaces.push_back(typeface); 50 init(typefaces); 51 } 52 53 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : 54 mMaxChar(0) { 55 init(typefaces); 56 } 57 58 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) { 59 android::AutoMutex _l(gMinikinLock); 60 mId = sNextId++; 61 vector<uint32_t> lastChar; 62 size_t nTypefaces = typefaces.size(); 63 #ifdef VERBOSE_DEBUG 64 ALOGD("nTypefaces = %zd\n", nTypefaces); 65 #endif 66 const FontStyle defaultStyle; 67 for (size_t i = 0; i < nTypefaces; i++) { 68 const std::shared_ptr<FontFamily>& family = typefaces[i]; 69 if (family->getClosestMatch(defaultStyle).font == nullptr) { 70 continue; 71 } 72 const SparseBitSet& coverage = family->getCoverage(); 73 mFamilies.push_back(family); // emplace_back would be better 74 if (family->hasVSTable()) { 75 mVSFamilyVec.push_back(family); 76 } 77 mMaxChar = max(mMaxChar, coverage.length()); 78 lastChar.push_back(coverage.nextSetBit(0)); 79 80 const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes(); 81 mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end()); 82 } 83 nTypefaces = mFamilies.size(); 84 LOG_ALWAYS_FATAL_IF(nTypefaces == 0, 85 "Font collection must have at least one valid typeface"); 86 LOG_ALWAYS_FATAL_IF(nTypefaces > 254, 87 "Font collection may only have up to 254 font families."); 88 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage; 89 // TODO: Use variation selector map for mRanges construction. 90 // A font can have a glyph for a base code point and variation selector pair but no glyph for 91 // the base code point without variation selector. The family won't be listed in the range in 92 // this case. 93 for (size_t i = 0; i < nPages; i++) { 94 Range dummy; 95 mRanges.push_back(dummy); 96 Range* range = &mRanges.back(); 97 #ifdef VERBOSE_DEBUG 98 ALOGD("i=%zd: range start = %zd\n", i, offset); 99 #endif 100 range->start = mFamilyVec.size(); 101 for (size_t j = 0; j < nTypefaces; j++) { 102 if (lastChar[j] < (i + 1) << kLogCharsPerPage) { 103 const std::shared_ptr<FontFamily>& family = mFamilies[j]; 104 mFamilyVec.push_back(static_cast<uint8_t>(j)); 105 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage); 106 #ifdef VERBOSE_DEBUG 107 ALOGD("nextChar = %d (j = %zd)\n", nextChar, j); 108 #endif 109 lastChar[j] = nextChar; 110 } 111 } 112 range->end = mFamilyVec.size(); 113 } 114 // See the comment in Range for more details. 115 LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF, 116 "Exceeded the maximum indexable cmap coverage."); 117 } 118 119 // Special scores for the font fallback. 120 const uint32_t kUnsupportedFontScore = 0; 121 const uint32_t kFirstFontScore = UINT32_MAX; 122 123 // Calculates a font score. 124 // The score of the font family is based on three subscores. 125 // - Coverage Score: How well the font family covers the given character or variation sequence. 126 // - Language Score: How well the font family is appropriate for the language. 127 // - Variant Score: Whether the font family matches the variant. Note that this variant is not the 128 // one in BCP47. This is our own font variant (e.g., elegant, compact). 129 // 130 // Then, there is a priority for these three subscores as follow: 131 // Coverage Score > Language Score > Variant Score 132 // The returned score reflects this priority order. 133 // 134 // Note that there are two special scores. 135 // - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its 136 // base character. 137 // - kFirstFontScore: When the font is the first font family in the collection and it supports the 138 // given character or variation sequence. 139 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId, 140 const std::shared_ptr<FontFamily>& fontFamily) const { 141 142 const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily); 143 if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) { 144 // No need to calculate other scores. 145 return coverageScore; 146 } 147 148 const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily); 149 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily); 150 151 // Subscores are encoded into 31 bits representation to meet the subscore priority. 152 // The highest 2 bits are for coverage score, then following 28 bits are for language score, 153 // then the last 1 bit is for variant score. 154 return coverageScore << 29 | languageScore << 1 | variantScore; 155 } 156 157 // Calculates a font score based on variation sequence coverage. 158 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base 159 // character. 160 // - Returns kFirstFontScore if the font family is the first font family in the collection and it 161 // supports the given character or variation sequence. 162 // - Returns 3 if the font family supports the variation sequence. 163 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font. 164 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font. 165 // - Returns 1 if the variation selector is not specified or if the font family only supports the 166 // variation sequence's base character. 167 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, 168 const std::shared_ptr<FontFamily>& fontFamily) const { 169 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs); 170 if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) { 171 // The font doesn't support either variation sequence or even the base character. 172 return kUnsupportedFontScore; 173 } 174 175 if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) { 176 // If the first font family supports the given character or variation sequence, always use 177 // it. 178 return kFirstFontScore; 179 } 180 181 if (vs == 0) { 182 return 1; 183 } 184 185 if (hasVSGlyph) { 186 return 3; 187 } 188 189 if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) { 190 const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId()); 191 bool hasEmojiFlag = false; 192 for (size_t i = 0; i < langs.size(); ++i) { 193 if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) { 194 hasEmojiFlag = true; 195 break; 196 } 197 } 198 199 if (vs == EMOJI_STYLE_VS) { 200 return hasEmojiFlag ? 2 : 1; 201 } else { // vs == TEXT_STYLE_VS 202 return hasEmojiFlag ? 1 : 2; 203 } 204 } 205 return 1; 206 } 207 208 // Calculate font scores based on the script matching, subtag matching and primary langauge matching. 209 // 210 // 1. If only the font's language matches or there is no matches between requested font and 211 // supported font, then the font obtains a score of 0. 212 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script, 213 // a match in subtag gets a score of 2 and a match in scripts gains a score of 1. 214 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while 215 // language-and-script obtains a socre of 3 with the same reason above. 216 // 217 // If two languages in the requested list have the same language score, the font matching with 218 // higher priority language gets a higher score. For example, in the case the user requested 219 // language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score 220 // than the font of "en-Latn". 221 // 222 // To achieve score calculation with priorities, the language score is determined as follows: 223 // LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1) 224 // Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's 225 // matching score. The possible values of s(i) are 0, 1, 2, 3 and 4. 226 uint32_t FontCollection::calcLanguageMatchingScore( 227 uint32_t userLangListId, const FontFamily& fontFamily) { 228 const FontLanguages& langList = FontLanguageListCache::getById(userLangListId); 229 const FontLanguages& fontLanguages = FontLanguageListCache::getById(fontFamily.langId()); 230 231 const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT); 232 uint32_t score = 0; 233 for (size_t i = 0; i < maxCompareNum; ++i) { 234 score = score * 5u + langList[i].calcScoreFor(fontLanguages); 235 } 236 return score; 237 } 238 239 // Calculates a font score based on variant ("compact" or "elegant") matching. 240 // - Returns 1 if the font doesn't have variant or the variant matches with the text style. 241 // - No score if the font has a variant but it doesn't match with the text style. 242 uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) { 243 return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0; 244 } 245 246 // Implement heuristic for choosing best-match font. Here are the rules: 247 // 1. If first font in the collection has the character, it wins. 248 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail. 249 // 3. Highest score wins, with ties resolved to the first font. 250 // This method never returns nullptr. 251 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs, 252 uint32_t langListId, int variant) const { 253 if (ch >= mMaxChar) { 254 return mFamilies[0]; 255 } 256 257 Range range = mRanges[ch >> kLogCharsPerPage]; 258 259 if (vs != 0) { 260 range = { 0, static_cast<uint16_t>(mFamilies.size()) }; 261 } 262 263 #ifdef VERBOSE_DEBUG 264 ALOGD("querying range %zd:%zd\n", range.start, range.end); 265 #endif 266 int bestFamilyIndex = -1; 267 uint32_t bestScore = kUnsupportedFontScore; 268 for (size_t i = range.start; i < range.end; i++) { 269 const std::shared_ptr<FontFamily>& family = 270 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i]; 271 const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family); 272 if (score == kFirstFontScore) { 273 // If the first font family supports the given character or variation sequence, always 274 // use it. 275 return family; 276 } 277 if (score > bestScore) { 278 bestScore = score; 279 bestFamilyIndex = i; 280 } 281 } 282 if (bestFamilyIndex == -1) { 283 UErrorCode errorCode = U_ZERO_ERROR; 284 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode); 285 if (U_SUCCESS(errorCode)) { 286 UChar decomposed[4]; 287 int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode); 288 if (U_SUCCESS(errorCode) && len > 0) { 289 int off = 0; 290 U16_NEXT_UNSAFE(decomposed, off, ch); 291 return getFamilyForChar(ch, vs, langListId, variant); 292 } 293 } 294 return mFamilies[0]; 295 } 296 return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex]; 297 } 298 299 // Characters where we want to continue using existing font run for (or stick to the next run if 300 // they start a string), even if the font does not support them explicitly. These are handled 301 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's 302 // usually meaningless to switch to a different font to display them. 303 static bool doesNotNeedFontSupport(uint32_t c) { 304 return c == 0x00AD // SOFT HYPHEN 305 || c == 0x034F // COMBINING GRAPHEME JOINER 306 || c == 0x061C // ARABIC LETTER MARK 307 || (0x200C <= c && c <= 0x200F) // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK 308 || (0x202A <= c && c <= 0x202E) // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 309 || (0x2066 <= c && c <= 0x2069) // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE 310 || c == 0xFEFF // BYTE ORDER MARK 311 || isVariationSelector(c); 312 } 313 314 // Characters where we want to continue using existing font run instead of 315 // recomputing the best match in the fallback list. 316 static const uint32_t stickyWhitelist[] = { 317 '!', 318 ',', 319 '-', 320 '.', 321 ':', 322 ';', 323 '?', 324 0x00A0, // NBSP 325 0x2010, // HYPHEN 326 0x2011, // NB_HYPHEN 327 0x202F, // NNBSP 328 0x2640, // FEMALE_SIGN, 329 0x2642, // MALE_SIGN, 330 0x2695, // STAFF_OF_AESCULAPIUS 331 }; 332 333 static bool isStickyWhitelisted(uint32_t c) { 334 for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) { 335 if (stickyWhitelist[i] == c) return true; 336 } 337 return false; 338 } 339 340 static inline bool isCombining(uint32_t c) { 341 return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0; 342 } 343 344 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint, 345 uint32_t variationSelector) const { 346 if (!isVariationSelector(variationSelector)) { 347 return false; 348 } 349 if (baseCodepoint >= mMaxChar) { 350 return false; 351 } 352 353 // Currently mRanges can not be used here since it isn't aware of the variation sequence. 354 for (size_t i = 0; i < mVSFamilyVec.size(); i++) { 355 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) { 356 return true; 357 } 358 } 359 360 // TODO: We can remove this lock by precomputing color emoji information. 361 android::AutoMutex _l(gMinikinLock); 362 363 // Even if there is no cmap format 14 subtable entry for the given sequence, should return true 364 // for <char, text presentation selector> case since we have special fallback rule for the 365 // sequence. Note that we don't need to restrict this to already standardized variation 366 // sequences, since Unicode is adding variation sequences more frequently now and may even move 367 // towards allowing text and emoji variation selectors on any character. 368 if (variationSelector == TEXT_STYLE_VS) { 369 for (size_t i = 0; i < mFamilies.size(); ++i) { 370 if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) { 371 return true; 372 } 373 } 374 } 375 376 return false; 377 } 378 379 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD; 380 381 void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style, 382 vector<Run>* result) const { 383 const uint32_t langListId = style.getLanguageListId(); 384 int variant = style.getVariant(); 385 const FontFamily* lastFamily = nullptr; 386 Run* run = nullptr; 387 388 if (string_size == 0) { 389 return; 390 } 391 392 const uint32_t kEndOfString = 0xFFFFFFFF; 393 394 uint32_t nextCh = 0; 395 uint32_t prevCh = 0; 396 size_t nextUtf16Pos = 0; 397 size_t readLength = 0; 398 U16_NEXT(string, readLength, string_size, nextCh); 399 if (U_IS_SURROGATE(nextCh)) { 400 nextCh = REPLACEMENT_CHARACTER; 401 } 402 403 do { 404 const uint32_t ch = nextCh; 405 const size_t utf16Pos = nextUtf16Pos; 406 nextUtf16Pos = readLength; 407 if (readLength < string_size) { 408 U16_NEXT(string, readLength, string_size, nextCh); 409 if (U_IS_SURROGATE(nextCh)) { 410 nextCh = REPLACEMENT_CHARACTER; 411 } 412 } else { 413 nextCh = kEndOfString; 414 } 415 416 bool shouldContinueRun = false; 417 if (doesNotNeedFontSupport(ch)) { 418 // Always continue if the character is a format character not needed to be in the font. 419 shouldContinueRun = true; 420 } else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) { 421 // Continue using existing font as long as it has coverage and is whitelisted. 422 shouldContinueRun = lastFamily->getCoverage().get(ch); 423 } 424 425 if (!shouldContinueRun) { 426 const std::shared_ptr<FontFamily>& family = getFamilyForChar( 427 ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant); 428 if (utf16Pos == 0 || family.get() != lastFamily) { 429 size_t start = utf16Pos; 430 // Workaround for combining marks and emoji modifiers until we implement 431 // per-cluster font selection: if a combining mark or an emoji modifier is found in 432 // a different font that also supports the previous character, attach previous 433 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is 434 // handled properly by this since it's a combining mark too. 435 if (utf16Pos != 0 && 436 (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) && 437 family != nullptr && family->getCoverage().get(prevCh)) { 438 const size_t prevChLength = U16_LENGTH(prevCh); 439 if (run != nullptr) { 440 run->end -= prevChLength; 441 if (run->start == run->end) { 442 result->pop_back(); 443 } 444 } 445 start -= prevChLength; 446 } 447 if (lastFamily == nullptr) { 448 // This is the first family ever assigned. We are either seeing the very first 449 // character (which means start would already be zero), or we have only seen 450 // characters that don't need any font support (which means we need to adjust 451 // start to be 0 to include those characters). 452 start = 0; 453 } 454 result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0}); 455 run = &result->back(); 456 lastFamily = family.get(); 457 } 458 } 459 prevCh = ch; 460 if (run != nullptr) { 461 run->end = nextUtf16Pos; // exclusive 462 } 463 } while (nextCh != kEndOfString); 464 465 if (lastFamily == nullptr) { 466 // No character needed any font support, so it doesn't really matter which font they end up 467 // getting displayed in. We put the whole string in one run, using the first font. 468 result->push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)}); 469 } 470 } 471 472 FakedFont FontCollection::baseFontFaked(FontStyle style) { 473 return mFamilies[0]->getClosestMatch(style); 474 } 475 476 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation( 477 const std::vector<FontVariation>& variations) { 478 if (variations.empty() || mSupportedAxes.empty()) { 479 return nullptr; 480 } 481 482 bool hasSupportedAxis = false; 483 for (const FontVariation& variation : variations) { 484 if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) { 485 hasSupportedAxis = true; 486 break; 487 } 488 } 489 if (!hasSupportedAxis) { 490 // None of variation axes are supported by this font collection. 491 return nullptr; 492 } 493 494 std::vector<std::shared_ptr<FontFamily> > families; 495 for (const std::shared_ptr<FontFamily>& family : mFamilies) { 496 std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations); 497 if (newFamily) { 498 families.push_back(newFamily); 499 } else { 500 families.push_back(family); 501 } 502 } 503 504 return std::shared_ptr<FontCollection>(new FontCollection(families)); 505 } 506 507 uint32_t FontCollection::getId() const { 508 return mId; 509 } 510 511 } // namespace minikin 512