1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "Minikin" 18 19 #include "minikin/FontCollection.h" 20 21 #include <algorithm> 22 23 #include <log/log.h> 24 #include <unicode/unistr.h> 25 #include <unicode/unorm2.h> 26 27 #include "minikin/Emoji.h" 28 29 #include "Locale.h" 30 #include "LocaleListCache.h" 31 #include "MinikinInternal.h" 32 33 using std::vector; 34 35 namespace minikin { 36 37 template <typename T> 38 static inline T max(T a, T b) { 39 return a > b ? a : b; 40 } 41 42 const uint32_t EMOJI_STYLE_VS = 0xFE0F; 43 const uint32_t TEXT_STYLE_VS = 0xFE0E; 44 45 static std::atomic<uint32_t> gNextCollectionId = {0}; 46 47 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) { 48 std::vector<std::shared_ptr<FontFamily>> typefaces; 49 typefaces.push_back(typeface); 50 init(typefaces); 51 } 52 53 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : mMaxChar(0) { 54 init(typefaces); 55 } 56 57 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) { 58 mId = gNextCollectionId++; 59 vector<uint32_t> lastChar; 60 size_t nTypefaces = typefaces.size(); 61 const FontStyle defaultStyle; 62 for (size_t i = 0; i < nTypefaces; i++) { 63 const std::shared_ptr<FontFamily>& family = typefaces[i]; 64 if (family->getClosestMatch(defaultStyle).font == nullptr) { 65 continue; 66 } 67 const SparseBitSet& coverage = family->getCoverage(); 68 mFamilies.push_back(family); // emplace_back would be better 69 if (family->hasVSTable()) { 70 mVSFamilyVec.push_back(family); 71 } 72 mMaxChar = max(mMaxChar, coverage.length()); 73 lastChar.push_back(coverage.nextSetBit(0)); 74 75 const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes(); 76 mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end()); 77 } 78 nTypefaces = mFamilies.size(); 79 MINIKIN_ASSERT(nTypefaces > 0, "Font collection must have at least one valid typeface"); 80 MINIKIN_ASSERT(nTypefaces <= MAX_FAMILY_COUNT, 81 "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT); 82 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage; 83 // TODO: Use variation selector map for mRanges construction. 84 // A font can have a glyph for a base code point and variation selector pair but no glyph for 85 // the base code point without variation selector. The family won't be listed in the range in 86 // this case. 87 for (size_t i = 0; i < nPages; i++) { 88 Range dummy; 89 mRanges.push_back(dummy); 90 Range* range = &mRanges.back(); 91 range->start = mFamilyVec.size(); 92 for (size_t j = 0; j < nTypefaces; j++) { 93 if (lastChar[j] < (i + 1) << kLogCharsPerPage) { 94 const std::shared_ptr<FontFamily>& family = mFamilies[j]; 95 mFamilyVec.push_back(static_cast<uint8_t>(j)); 96 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage); 97 lastChar[j] = nextChar; 98 } 99 } 100 range->end = mFamilyVec.size(); 101 } 102 // See the comment in Range for more details. 103 LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF, 104 "Exceeded the maximum indexable cmap coverage."); 105 } 106 107 // Special scores for the font fallback. 108 const uint32_t kUnsupportedFontScore = 0; 109 const uint32_t kFirstFontScore = UINT32_MAX; 110 111 // Calculates a font score. 112 // The score of the font family is based on three subscores. 113 // - Coverage Score: How well the font family covers the given character or variation sequence. 114 // - Locale Score: How well the font family is appropriate for the locale. 115 // - Variant Score: Whether the font family matches the variant. Note that this variant is not the 116 // one in BCP47. This is our own font variant (e.g., elegant, compact). 117 // 118 // Then, there is a priority for these three subscores as follow: 119 // Coverage Score > Locale Score > Variant Score 120 // The returned score reflects this priority order. 121 // 122 // Note that there are two special scores. 123 // - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its 124 // base character. 125 // - kFirstFontScore: When the font is the first font family in the collection and it supports the 126 // given character or variation sequence. 127 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FontFamily::Variant variant, 128 uint32_t localeListId, 129 const std::shared_ptr<FontFamily>& fontFamily) const { 130 const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily); 131 if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) { 132 // No need to calculate other scores. 133 return coverageScore; 134 } 135 136 const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily); 137 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily); 138 139 // Subscores are encoded into 31 bits representation to meet the subscore priority. 140 // The highest 2 bits are for coverage score, then following 28 bits are for locale score, 141 // then the last 1 bit is for variant score. 142 return coverageScore << 29 | localeScore << 1 | variantScore; 143 } 144 145 // Calculates a font score based on variation sequence coverage. 146 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base 147 // character. 148 // - Returns kFirstFontScore if the font family is the first font family in the collection and it 149 // supports the given character or variation sequence. 150 // - Returns 3 if the font family supports the variation sequence. 151 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font. 152 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font. 153 // - Returns 1 if the variation selector is not specified or if the font family only supports the 154 // variation sequence's base character. 155 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId, 156 const std::shared_ptr<FontFamily>& fontFamily) const { 157 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs); 158 if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) { 159 // The font doesn't support either variation sequence or even the base character. 160 return kUnsupportedFontScore; 161 } 162 163 if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) { 164 // If the first font family supports the given character or variation sequence, always use 165 // it. 166 return kFirstFontScore; 167 } 168 169 if (vs != 0 && hasVSGlyph) { 170 return 3; 171 } 172 173 bool colorEmojiRequest; 174 if (vs == EMOJI_STYLE_VS) { 175 colorEmojiRequest = true; 176 } else if (vs == TEXT_STYLE_VS) { 177 colorEmojiRequest = false; 178 } else { 179 switch (LocaleListCache::getById(localeListId).getEmojiStyle()) { 180 case EmojiStyle::EMOJI: 181 colorEmojiRequest = true; 182 break; 183 case EmojiStyle::TEXT: 184 colorEmojiRequest = false; 185 break; 186 case EmojiStyle::EMPTY: 187 case EmojiStyle::DEFAULT: 188 default: 189 // Do not give any extra score for the default emoji style. 190 return 1; 191 break; 192 } 193 } 194 195 return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1; 196 } 197 198 // Calculate font scores based on the script matching, subtag matching and primary locale matching. 199 // 200 // 1. If only the font's language matches or there is no matches between requested font and 201 // supported font, then the font obtains a score of 0. 202 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script, 203 // a match in subtag gets a score of 2 and a match in scripts gains a score of 1. 204 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while 205 // language-and-script obtains a socre of 3 with the same reason above. 206 // 207 // If two locales in the requested list have the same locale score, the font matching with higher 208 // priority locale gets a higher score. For example, in the case the user requested locale list is 209 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of 210 // "en-Latn". 211 // 212 // To achieve score calculation with priorities, the locale score is determined as follows: 213 // LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1) 214 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching 215 // score. The possible values of s(i) are 0, 1, 2, 3 and 4. 216 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId, 217 const FontFamily& fontFamily) { 218 const LocaleList& localeList = LocaleListCache::getById(userLocaleListId); 219 const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId()); 220 221 const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT); 222 uint32_t score = 0; 223 for (size_t i = 0; i < maxCompareNum; ++i) { 224 score = score * 5u + localeList[i].calcScoreFor(fontLocaleList); 225 } 226 return score; 227 } 228 229 // Calculates a font score based on variant ("compact" or "elegant") matching. 230 // - Returns 1 if the font doesn't have variant or the variant matches with the text style. 231 // - No score if the font has a variant but it doesn't match with the text style. 232 uint32_t FontCollection::calcVariantMatchingScore(FontFamily::Variant variant, 233 const FontFamily& fontFamily) { 234 const FontFamily::Variant familyVariant = fontFamily.variant(); 235 if (familyVariant == FontFamily::Variant::DEFAULT) { 236 return 1; 237 } 238 if (familyVariant == variant) { 239 return 1; 240 } 241 if (variant == FontFamily::Variant::DEFAULT && familyVariant == FontFamily::Variant::COMPACT) { 242 // If default is requested, prefer compat variation. 243 return 1; 244 } 245 return 0; 246 } 247 248 // Implement heuristic for choosing best-match font. Here are the rules: 249 // 1. If first font in the collection has the character, it wins. 250 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail. 251 // 3. Highest score wins, with ties resolved to the first font. 252 // This method never returns nullptr. 253 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar( 254 uint32_t ch, uint32_t vs, uint32_t localeListId, FontFamily::Variant variant) const { 255 if (ch >= mMaxChar) { 256 return mFamilies[0]; 257 } 258 259 Range range = mRanges[ch >> kLogCharsPerPage]; 260 261 if (vs != 0) { 262 range = {0, static_cast<uint16_t>(mFamilies.size())}; 263 } 264 265 int bestFamilyIndex = -1; 266 uint32_t bestScore = kUnsupportedFontScore; 267 for (size_t i = range.start; i < range.end; i++) { 268 const std::shared_ptr<FontFamily>& family = 269 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i]; 270 const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family); 271 if (score == kFirstFontScore) { 272 // If the first font family supports the given character or variation sequence, always 273 // use it. 274 return family; 275 } 276 if (score > bestScore) { 277 bestScore = score; 278 bestFamilyIndex = i; 279 } 280 } 281 if (bestFamilyIndex == -1) { 282 UErrorCode errorCode = U_ZERO_ERROR; 283 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode); 284 if (U_SUCCESS(errorCode)) { 285 UChar decomposed[4]; 286 int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode); 287 if (U_SUCCESS(errorCode) && len > 0) { 288 int off = 0; 289 U16_NEXT_UNSAFE(decomposed, off, ch); 290 return getFamilyForChar(ch, vs, localeListId, variant); 291 } 292 } 293 return mFamilies[0]; 294 } 295 return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex]; 296 } 297 298 // Characters where we want to continue using existing font run for (or stick to the next run if 299 // they start a string), even if the font does not support them explicitly. These are handled 300 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's 301 // usually meaningless to switch to a different font to display them. 302 static bool doesNotNeedFontSupport(uint32_t c) { 303 return c == 0x00AD // SOFT HYPHEN 304 || c == 0x034F // COMBINING GRAPHEME JOINER 305 || c == 0x061C // ARABIC LETTER MARK 306 || (0x200C <= c && c <= 0x200F) // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK 307 || (0x202A <= c && c <= 0x202E) // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 308 || (0x2066 <= c && c <= 0x2069) // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE 309 || c == 0xFEFF // BYTE ORDER MARK 310 || isVariationSelector(c); 311 } 312 313 // Characters where we want to continue using existing font run instead of 314 // recomputing the best match in the fallback list. 315 static const uint32_t stickyWhitelist[] = { 316 '!', ',', '-', '.', ':', ';', '?', 317 0x00A0, // NBSP 318 0x2010, // HYPHEN 319 0x2011, // NB_HYPHEN 320 0x202F, // NNBSP 321 0x2640, // FEMALE_SIGN, 322 0x2642, // MALE_SIGN, 323 0x2695, // STAFF_OF_AESCULAPIUS 324 }; 325 326 static bool isStickyWhitelisted(uint32_t c) { 327 for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) { 328 if (stickyWhitelist[i] == c) return true; 329 } 330 return false; 331 } 332 333 static inline bool isCombining(uint32_t c) { 334 return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0; 335 } 336 337 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint, 338 uint32_t variationSelector) const { 339 if (!isVariationSelector(variationSelector)) { 340 return false; 341 } 342 if (baseCodepoint >= mMaxChar) { 343 return false; 344 } 345 346 // Currently mRanges can not be used here since it isn't aware of the variation sequence. 347 for (size_t i = 0; i < mVSFamilyVec.size(); i++) { 348 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) { 349 return true; 350 } 351 } 352 353 // Even if there is no cmap format 14 subtable entry for the given sequence, should return true 354 // for <char, text presentation selector> case since we have special fallback rule for the 355 // sequence. Note that we don't need to restrict this to already standardized variation 356 // sequences, since Unicode is adding variation sequences more frequently now and may even move 357 // towards allowing text and emoji variation selectors on any character. 358 if (variationSelector == TEXT_STYLE_VS) { 359 for (size_t i = 0; i < mFamilies.size(); ++i) { 360 if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) { 361 return true; 362 } 363 } 364 } 365 366 return false; 367 } 368 369 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD; 370 371 void FontCollection::itemize(const uint16_t* string, size_t string_size, const MinikinPaint& paint, 372 vector<Run>* result) const { 373 const FontFamily::Variant familyVariant = paint.familyVariant; 374 const FontStyle style = paint.fontStyle; 375 const uint32_t localeListId = paint.localeListId; 376 377 const FontFamily* lastFamily = nullptr; 378 Run* run = nullptr; 379 380 if (string_size == 0) { 381 return; 382 } 383 384 const uint32_t kEndOfString = 0xFFFFFFFF; 385 386 uint32_t nextCh = 0; 387 uint32_t prevCh = 0; 388 size_t nextUtf16Pos = 0; 389 size_t readLength = 0; 390 U16_NEXT(string, readLength, string_size, nextCh); 391 if (U_IS_SURROGATE(nextCh)) { 392 nextCh = REPLACEMENT_CHARACTER; 393 } 394 395 do { 396 const uint32_t ch = nextCh; 397 const size_t utf16Pos = nextUtf16Pos; 398 nextUtf16Pos = readLength; 399 if (readLength < string_size) { 400 U16_NEXT(string, readLength, string_size, nextCh); 401 if (U_IS_SURROGATE(nextCh)) { 402 nextCh = REPLACEMENT_CHARACTER; 403 } 404 } else { 405 nextCh = kEndOfString; 406 } 407 408 bool shouldContinueRun = false; 409 if (doesNotNeedFontSupport(ch)) { 410 // Always continue if the character is a format character not needed to be in the font. 411 shouldContinueRun = true; 412 } else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) { 413 // Continue using existing font as long as it has coverage and is whitelisted. 414 shouldContinueRun = lastFamily->getCoverage().get(ch); 415 } 416 417 if (!shouldContinueRun) { 418 const std::shared_ptr<FontFamily>& family = getFamilyForChar( 419 ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant); 420 if (utf16Pos == 0 || family.get() != lastFamily) { 421 size_t start = utf16Pos; 422 // Workaround for combining marks and emoji modifiers until we implement 423 // per-cluster font selection: if a combining mark or an emoji modifier is found in 424 // a different font that also supports the previous character, attach previous 425 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is 426 // handled properly by this since it's a combining mark too. 427 if (utf16Pos != 0 && 428 (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) && 429 family != nullptr && family->getCoverage().get(prevCh)) { 430 const size_t prevChLength = U16_LENGTH(prevCh); 431 if (run != nullptr) { 432 run->end -= prevChLength; 433 if (run->start == run->end) { 434 result->pop_back(); 435 } 436 } 437 start -= prevChLength; 438 } 439 if (lastFamily == nullptr) { 440 // This is the first family ever assigned. We are either seeing the very first 441 // character (which means start would already be zero), or we have only seen 442 // characters that don't need any font support (which means we need to adjust 443 // start to be 0 to include those characters). 444 start = 0; 445 } 446 result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0}); 447 run = &result->back(); 448 lastFamily = family.get(); 449 } 450 } 451 prevCh = ch; 452 if (run != nullptr) { 453 run->end = nextUtf16Pos; // exclusive 454 } 455 } while (nextCh != kEndOfString); 456 457 if (lastFamily == nullptr) { 458 // No character needed any font support, so it doesn't really matter which font they end up 459 // getting displayed in. We put the whole string in one run, using the first font. 460 result->push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)}); 461 } 462 } 463 464 FakedFont FontCollection::baseFontFaked(FontStyle style) { 465 return mFamilies[0]->getClosestMatch(style); 466 } 467 468 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation( 469 const std::vector<FontVariation>& variations) { 470 if (variations.empty() || mSupportedAxes.empty()) { 471 return nullptr; 472 } 473 474 bool hasSupportedAxis = false; 475 for (const FontVariation& variation : variations) { 476 if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) { 477 hasSupportedAxis = true; 478 break; 479 } 480 } 481 if (!hasSupportedAxis) { 482 // None of variation axes are supported by this font collection. 483 return nullptr; 484 } 485 486 std::vector<std::shared_ptr<FontFamily>> families; 487 for (const std::shared_ptr<FontFamily>& family : mFamilies) { 488 std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations); 489 if (newFamily) { 490 families.push_back(newFamily); 491 } else { 492 families.push_back(family); 493 } 494 } 495 496 return std::shared_ptr<FontCollection>(new FontCollection(families)); 497 } 498 499 uint32_t FontCollection::getId() const { 500 return mId; 501 } 502 503 } // namespace minikin 504