Home | History | Annotate | Download | only in minikin
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "Minikin"
     18 
     19 #include "minikin/FontCollection.h"
     20 
     21 #include <algorithm>
     22 
     23 #include <log/log.h>
     24 #include <unicode/unistr.h>
     25 #include <unicode/unorm2.h>
     26 
     27 #include "minikin/Emoji.h"
     28 
     29 #include "Locale.h"
     30 #include "LocaleListCache.h"
     31 #include "MinikinInternal.h"
     32 
     33 using std::vector;
     34 
     35 namespace minikin {
     36 
     37 template <typename T>
     38 static inline T max(T a, T b) {
     39     return a > b ? a : b;
     40 }
     41 
     42 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
     43 const uint32_t TEXT_STYLE_VS = 0xFE0E;
     44 
     45 static std::atomic<uint32_t> gNextCollectionId = {0};
     46 
     47 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
     48     std::vector<std::shared_ptr<FontFamily>> typefaces;
     49     typefaces.push_back(typeface);
     50     init(typefaces);
     51 }
     52 
     53 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : mMaxChar(0) {
     54     init(typefaces);
     55 }
     56 
     57 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
     58     mId = gNextCollectionId++;
     59     vector<uint32_t> lastChar;
     60     size_t nTypefaces = typefaces.size();
     61     const FontStyle defaultStyle;
     62     for (size_t i = 0; i < nTypefaces; i++) {
     63         const std::shared_ptr<FontFamily>& family = typefaces[i];
     64         if (family->getClosestMatch(defaultStyle).font == nullptr) {
     65             continue;
     66         }
     67         const SparseBitSet& coverage = family->getCoverage();
     68         mFamilies.push_back(family);  // emplace_back would be better
     69         if (family->hasVSTable()) {
     70             mVSFamilyVec.push_back(family);
     71         }
     72         mMaxChar = max(mMaxChar, coverage.length());
     73         lastChar.push_back(coverage.nextSetBit(0));
     74 
     75         const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
     76         mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
     77     }
     78     nTypefaces = mFamilies.size();
     79     MINIKIN_ASSERT(nTypefaces > 0, "Font collection must have at least one valid typeface");
     80     MINIKIN_ASSERT(nTypefaces <= MAX_FAMILY_COUNT,
     81                    "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
     82     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
     83     // TODO: Use variation selector map for mRanges construction.
     84     // A font can have a glyph for a base code point and variation selector pair but no glyph for
     85     // the base code point without variation selector. The family won't be listed in the range in
     86     // this case.
     87     for (size_t i = 0; i < nPages; i++) {
     88         Range dummy;
     89         mRanges.push_back(dummy);
     90         Range* range = &mRanges.back();
     91         range->start = mFamilyVec.size();
     92         for (size_t j = 0; j < nTypefaces; j++) {
     93             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
     94                 const std::shared_ptr<FontFamily>& family = mFamilies[j];
     95                 mFamilyVec.push_back(static_cast<uint8_t>(j));
     96                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
     97                 lastChar[j] = nextChar;
     98             }
     99         }
    100         range->end = mFamilyVec.size();
    101     }
    102     // See the comment in Range for more details.
    103     LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
    104                         "Exceeded the maximum indexable cmap coverage.");
    105 }
    106 
    107 // Special scores for the font fallback.
    108 const uint32_t kUnsupportedFontScore = 0;
    109 const uint32_t kFirstFontScore = UINT32_MAX;
    110 
    111 // Calculates a font score.
    112 // The score of the font family is based on three subscores.
    113 //  - Coverage Score: How well the font family covers the given character or variation sequence.
    114 //  - Locale Score: How well the font family is appropriate for the locale.
    115 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
    116 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
    117 //
    118 // Then, there is a priority for these three subscores as follow:
    119 //   Coverage Score > Locale Score > Variant Score
    120 // The returned score reflects this priority order.
    121 //
    122 // Note that there are two special scores.
    123 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
    124 //    base character.
    125 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
    126 //    given character or variation sequence.
    127 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FontFamily::Variant variant,
    128                                          uint32_t localeListId,
    129                                          const std::shared_ptr<FontFamily>& fontFamily) const {
    130     const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
    131     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
    132         // No need to calculate other scores.
    133         return coverageScore;
    134     }
    135 
    136     const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
    137     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
    138 
    139     // Subscores are encoded into 31 bits representation to meet the subscore priority.
    140     // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
    141     // then the last 1 bit is for variant score.
    142     return coverageScore << 29 | localeScore << 1 | variantScore;
    143 }
    144 
    145 // Calculates a font score based on variation sequence coverage.
    146 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
    147 //   character.
    148 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
    149 //   supports the given character or variation sequence.
    150 // - Returns 3 if the font family supports the variation sequence.
    151 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
    152 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
    153 // - Returns 1 if the variation selector is not specified or if the font family only supports the
    154 //   variation sequence's base character.
    155 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
    156                                            const std::shared_ptr<FontFamily>& fontFamily) const {
    157     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
    158     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
    159         // The font doesn't support either variation sequence or even the base character.
    160         return kUnsupportedFontScore;
    161     }
    162 
    163     if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
    164         // If the first font family supports the given character or variation sequence, always use
    165         // it.
    166         return kFirstFontScore;
    167     }
    168 
    169     if (vs != 0 && hasVSGlyph) {
    170         return 3;
    171     }
    172 
    173     bool colorEmojiRequest;
    174     if (vs == EMOJI_STYLE_VS) {
    175         colorEmojiRequest = true;
    176     } else if (vs == TEXT_STYLE_VS) {
    177         colorEmojiRequest = false;
    178     } else {
    179         switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
    180             case EmojiStyle::EMOJI:
    181                 colorEmojiRequest = true;
    182                 break;
    183             case EmojiStyle::TEXT:
    184                 colorEmojiRequest = false;
    185                 break;
    186             case EmojiStyle::EMPTY:
    187             case EmojiStyle::DEFAULT:
    188             default:
    189                 // Do not give any extra score for the default emoji style.
    190                 return 1;
    191                 break;
    192         }
    193     }
    194 
    195     return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
    196 }
    197 
    198 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
    199 //
    200 // 1. If only the font's language matches or there is no matches between requested font and
    201 //    supported font, then the font obtains a score of 0.
    202 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
    203 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
    204 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
    205 //    language-and-script obtains a socre of 3 with the same reason above.
    206 //
    207 // If two locales in the requested list have the same locale score, the font matching with higher
    208 // priority locale gets a higher score. For example, in the case the user requested locale list is
    209 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
    210 // "en-Latn".
    211 //
    212 // To achieve score calculation with priorities, the locale score is determined as follows:
    213 //   LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
    214 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
    215 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
    216 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
    217                                                  const FontFamily& fontFamily) {
    218     const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
    219     const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
    220 
    221     const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
    222     uint32_t score = 0;
    223     for (size_t i = 0; i < maxCompareNum; ++i) {
    224         score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
    225     }
    226     return score;
    227 }
    228 
    229 // Calculates a font score based on variant ("compact" or "elegant") matching.
    230 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
    231 //  - No score if the font has a variant but it doesn't match with the text style.
    232 uint32_t FontCollection::calcVariantMatchingScore(FontFamily::Variant variant,
    233                                                   const FontFamily& fontFamily) {
    234     const FontFamily::Variant familyVariant = fontFamily.variant();
    235     if (familyVariant == FontFamily::Variant::DEFAULT) {
    236         return 1;
    237     }
    238     if (familyVariant == variant) {
    239         return 1;
    240     }
    241     if (variant == FontFamily::Variant::DEFAULT && familyVariant == FontFamily::Variant::COMPACT) {
    242         // If default is requested, prefer compat variation.
    243         return 1;
    244     }
    245     return 0;
    246 }
    247 
    248 // Implement heuristic for choosing best-match font. Here are the rules:
    249 // 1. If first font in the collection has the character, it wins.
    250 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
    251 // 3. Highest score wins, with ties resolved to the first font.
    252 // This method never returns nullptr.
    253 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(
    254         uint32_t ch, uint32_t vs, uint32_t localeListId, FontFamily::Variant variant) const {
    255     if (ch >= mMaxChar) {
    256         return mFamilies[0];
    257     }
    258 
    259     Range range = mRanges[ch >> kLogCharsPerPage];
    260 
    261     if (vs != 0) {
    262         range = {0, static_cast<uint16_t>(mFamilies.size())};
    263     }
    264 
    265     int bestFamilyIndex = -1;
    266     uint32_t bestScore = kUnsupportedFontScore;
    267     for (size_t i = range.start; i < range.end; i++) {
    268         const std::shared_ptr<FontFamily>& family =
    269                 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
    270         const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
    271         if (score == kFirstFontScore) {
    272             // If the first font family supports the given character or variation sequence, always
    273             // use it.
    274             return family;
    275         }
    276         if (score > bestScore) {
    277             bestScore = score;
    278             bestFamilyIndex = i;
    279         }
    280     }
    281     if (bestFamilyIndex == -1) {
    282         UErrorCode errorCode = U_ZERO_ERROR;
    283         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
    284         if (U_SUCCESS(errorCode)) {
    285             UChar decomposed[4];
    286             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
    287             if (U_SUCCESS(errorCode) && len > 0) {
    288                 int off = 0;
    289                 U16_NEXT_UNSAFE(decomposed, off, ch);
    290                 return getFamilyForChar(ch, vs, localeListId, variant);
    291             }
    292         }
    293         return mFamilies[0];
    294     }
    295     return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
    296 }
    297 
    298 // Characters where we want to continue using existing font run for (or stick to the next run if
    299 // they start a string), even if the font does not support them explicitly. These are handled
    300 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
    301 // usually meaningless to switch to a different font to display them.
    302 static bool doesNotNeedFontSupport(uint32_t c) {
    303     return c == 0x00AD                      // SOFT HYPHEN
    304            || c == 0x034F                   // COMBINING GRAPHEME JOINER
    305            || c == 0x061C                   // ARABIC LETTER MARK
    306            || (0x200C <= c && c <= 0x200F)  // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
    307            || (0x202A <= c && c <= 0x202E)  // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
    308            || (0x2066 <= c && c <= 0x2069)  // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
    309            || c == 0xFEFF                   // BYTE ORDER MARK
    310            || isVariationSelector(c);
    311 }
    312 
    313 // Characters where we want to continue using existing font run instead of
    314 // recomputing the best match in the fallback list.
    315 static const uint32_t stickyWhitelist[] = {
    316         '!',    ',', '-', '.', ':', ';', '?',
    317         0x00A0,  // NBSP
    318         0x2010,  // HYPHEN
    319         0x2011,  // NB_HYPHEN
    320         0x202F,  // NNBSP
    321         0x2640,  // FEMALE_SIGN,
    322         0x2642,  // MALE_SIGN,
    323         0x2695,  // STAFF_OF_AESCULAPIUS
    324 };
    325 
    326 static bool isStickyWhitelisted(uint32_t c) {
    327     for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
    328         if (stickyWhitelist[i] == c) return true;
    329     }
    330     return false;
    331 }
    332 
    333 static inline bool isCombining(uint32_t c) {
    334     return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
    335 }
    336 
    337 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
    338                                           uint32_t variationSelector) const {
    339     if (!isVariationSelector(variationSelector)) {
    340         return false;
    341     }
    342     if (baseCodepoint >= mMaxChar) {
    343         return false;
    344     }
    345 
    346     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
    347     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
    348         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
    349             return true;
    350         }
    351     }
    352 
    353     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
    354     // for <char, text presentation selector> case since we have special fallback rule for the
    355     // sequence. Note that we don't need to restrict this to already standardized variation
    356     // sequences, since Unicode is adding variation sequences more frequently now and may even move
    357     // towards allowing text and emoji variation selectors on any character.
    358     if (variationSelector == TEXT_STYLE_VS) {
    359         for (size_t i = 0; i < mFamilies.size(); ++i) {
    360             if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
    361                 return true;
    362             }
    363         }
    364     }
    365 
    366     return false;
    367 }
    368 
    369 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
    370 
    371 void FontCollection::itemize(const uint16_t* string, size_t string_size, const MinikinPaint& paint,
    372                              vector<Run>* result) const {
    373     const FontFamily::Variant familyVariant = paint.familyVariant;
    374     const FontStyle style = paint.fontStyle;
    375     const uint32_t localeListId = paint.localeListId;
    376 
    377     const FontFamily* lastFamily = nullptr;
    378     Run* run = nullptr;
    379 
    380     if (string_size == 0) {
    381         return;
    382     }
    383 
    384     const uint32_t kEndOfString = 0xFFFFFFFF;
    385 
    386     uint32_t nextCh = 0;
    387     uint32_t prevCh = 0;
    388     size_t nextUtf16Pos = 0;
    389     size_t readLength = 0;
    390     U16_NEXT(string, readLength, string_size, nextCh);
    391     if (U_IS_SURROGATE(nextCh)) {
    392         nextCh = REPLACEMENT_CHARACTER;
    393     }
    394 
    395     do {
    396         const uint32_t ch = nextCh;
    397         const size_t utf16Pos = nextUtf16Pos;
    398         nextUtf16Pos = readLength;
    399         if (readLength < string_size) {
    400             U16_NEXT(string, readLength, string_size, nextCh);
    401             if (U_IS_SURROGATE(nextCh)) {
    402                 nextCh = REPLACEMENT_CHARACTER;
    403             }
    404         } else {
    405             nextCh = kEndOfString;
    406         }
    407 
    408         bool shouldContinueRun = false;
    409         if (doesNotNeedFontSupport(ch)) {
    410             // Always continue if the character is a format character not needed to be in the font.
    411             shouldContinueRun = true;
    412         } else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) {
    413             // Continue using existing font as long as it has coverage and is whitelisted.
    414             shouldContinueRun = lastFamily->getCoverage().get(ch);
    415         }
    416 
    417         if (!shouldContinueRun) {
    418             const std::shared_ptr<FontFamily>& family = getFamilyForChar(
    419                     ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
    420             if (utf16Pos == 0 || family.get() != lastFamily) {
    421                 size_t start = utf16Pos;
    422                 // Workaround for combining marks and emoji modifiers until we implement
    423                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
    424                 // a different font that also supports the previous character, attach previous
    425                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
    426                 // handled properly by this since it's a combining mark too.
    427                 if (utf16Pos != 0 &&
    428                     (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
    429                     family != nullptr && family->getCoverage().get(prevCh)) {
    430                     const size_t prevChLength = U16_LENGTH(prevCh);
    431                     if (run != nullptr) {
    432                         run->end -= prevChLength;
    433                         if (run->start == run->end) {
    434                             result->pop_back();
    435                         }
    436                     }
    437                     start -= prevChLength;
    438                 }
    439                 if (lastFamily == nullptr) {
    440                     // This is the first family ever assigned. We are either seeing the very first
    441                     // character (which means start would already be zero), or we have only seen
    442                     // characters that don't need any font support (which means we need to adjust
    443                     // start to be 0 to include those characters).
    444                     start = 0;
    445                 }
    446                 result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
    447                 run = &result->back();
    448                 lastFamily = family.get();
    449             }
    450         }
    451         prevCh = ch;
    452         if (run != nullptr) {
    453             run->end = nextUtf16Pos;  // exclusive
    454         }
    455     } while (nextCh != kEndOfString);
    456 
    457     if (lastFamily == nullptr) {
    458         // No character needed any font support, so it doesn't really matter which font they end up
    459         // getting displayed in. We put the whole string in one run, using the first font.
    460         result->push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)});
    461     }
    462 }
    463 
    464 FakedFont FontCollection::baseFontFaked(FontStyle style) {
    465     return mFamilies[0]->getClosestMatch(style);
    466 }
    467 
    468 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
    469         const std::vector<FontVariation>& variations) {
    470     if (variations.empty() || mSupportedAxes.empty()) {
    471         return nullptr;
    472     }
    473 
    474     bool hasSupportedAxis = false;
    475     for (const FontVariation& variation : variations) {
    476         if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
    477             hasSupportedAxis = true;
    478             break;
    479         }
    480     }
    481     if (!hasSupportedAxis) {
    482         // None of variation axes are supported by this font collection.
    483         return nullptr;
    484     }
    485 
    486     std::vector<std::shared_ptr<FontFamily>> families;
    487     for (const std::shared_ptr<FontFamily>& family : mFamilies) {
    488         std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
    489         if (newFamily) {
    490             families.push_back(newFamily);
    491         } else {
    492             families.push_back(family);
    493         }
    494     }
    495 
    496     return std::shared_ptr<FontCollection>(new FontCollection(families));
    497 }
    498 
    499 uint32_t FontCollection::getId() const {
    500     return mId;
    501 }
    502 
    503 }  // namespace minikin
    504