Home | History | Annotate | Download | only in minikin
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 // #define VERBOSE_DEBUG
     18 
     19 #define LOG_TAG "Minikin"
     20 
     21 #include <algorithm>
     22 
     23 #include <log/log.h>
     24 #include "unicode/unistr.h"
     25 #include "unicode/unorm2.h"
     26 
     27 #include "FontLanguage.h"
     28 #include "FontLanguageListCache.h"
     29 #include "MinikinInternal.h"
     30 #include <minikin/Emoji.h>
     31 #include <minikin/FontCollection.h>
     32 
     33 using std::vector;
     34 
     35 namespace minikin {
     36 
     37 template <typename T>
     38 static inline T max(T a, T b) {
     39     return a>b ? a : b;
     40 }
     41 
     42 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
     43 const uint32_t TEXT_STYLE_VS = 0xFE0E;
     44 
     45 uint32_t FontCollection::sNextId = 0;
     46 
     47 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
     48     std::vector<std::shared_ptr<FontFamily>> typefaces;
     49     typefaces.push_back(typeface);
     50     init(typefaces);
     51 }
     52 
     53 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) :
     54     mMaxChar(0) {
     55     init(typefaces);
     56 }
     57 
     58 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
     59     android::AutoMutex _l(gMinikinLock);
     60     mId = sNextId++;
     61     vector<uint32_t> lastChar;
     62     size_t nTypefaces = typefaces.size();
     63 #ifdef VERBOSE_DEBUG
     64     ALOGD("nTypefaces = %zd\n", nTypefaces);
     65 #endif
     66     const FontStyle defaultStyle;
     67     for (size_t i = 0; i < nTypefaces; i++) {
     68         const std::shared_ptr<FontFamily>& family = typefaces[i];
     69         if (family->getClosestMatch(defaultStyle).font == nullptr) {
     70             continue;
     71         }
     72         const SparseBitSet& coverage = family->getCoverage();
     73         mFamilies.push_back(family);  // emplace_back would be better
     74         if (family->hasVSTable()) {
     75             mVSFamilyVec.push_back(family);
     76         }
     77         mMaxChar = max(mMaxChar, coverage.length());
     78         lastChar.push_back(coverage.nextSetBit(0));
     79 
     80         const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
     81         mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
     82     }
     83     nTypefaces = mFamilies.size();
     84     LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
     85         "Font collection must have at least one valid typeface");
     86     LOG_ALWAYS_FATAL_IF(nTypefaces > 254,
     87         "Font collection may only have up to 254 font families.");
     88     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
     89     // TODO: Use variation selector map for mRanges construction.
     90     // A font can have a glyph for a base code point and variation selector pair but no glyph for
     91     // the base code point without variation selector. The family won't be listed in the range in
     92     // this case.
     93     for (size_t i = 0; i < nPages; i++) {
     94         Range dummy;
     95         mRanges.push_back(dummy);
     96         Range* range = &mRanges.back();
     97 #ifdef VERBOSE_DEBUG
     98         ALOGD("i=%zd: range start = %zd\n", i, offset);
     99 #endif
    100         range->start = mFamilyVec.size();
    101         for (size_t j = 0; j < nTypefaces; j++) {
    102             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
    103                 const std::shared_ptr<FontFamily>& family = mFamilies[j];
    104                 mFamilyVec.push_back(static_cast<uint8_t>(j));
    105                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
    106 #ifdef VERBOSE_DEBUG
    107                 ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
    108 #endif
    109                 lastChar[j] = nextChar;
    110             }
    111         }
    112         range->end = mFamilyVec.size();
    113     }
    114     // See the comment in Range for more details.
    115     LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
    116         "Exceeded the maximum indexable cmap coverage.");
    117 }
    118 
    119 // Special scores for the font fallback.
    120 const uint32_t kUnsupportedFontScore = 0;
    121 const uint32_t kFirstFontScore = UINT32_MAX;
    122 
    123 // Calculates a font score.
    124 // The score of the font family is based on three subscores.
    125 //  - Coverage Score: How well the font family covers the given character or variation sequence.
    126 //  - Language Score: How well the font family is appropriate for the language.
    127 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
    128 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
    129 //
    130 // Then, there is a priority for these three subscores as follow:
    131 //   Coverage Score > Language Score > Variant Score
    132 // The returned score reflects this priority order.
    133 //
    134 // Note that there are two special scores.
    135 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
    136 //    base character.
    137 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
    138 //    given character or variation sequence.
    139 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
    140         const std::shared_ptr<FontFamily>& fontFamily) const {
    141 
    142     const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
    143     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
    144         // No need to calculate other scores.
    145         return coverageScore;
    146     }
    147 
    148     const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily);
    149     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
    150 
    151     // Subscores are encoded into 31 bits representation to meet the subscore priority.
    152     // The highest 2 bits are for coverage score, then following 28 bits are for language score,
    153     // then the last 1 bit is for variant score.
    154     return coverageScore << 29 | languageScore << 1 | variantScore;
    155 }
    156 
    157 // Calculates a font score based on variation sequence coverage.
    158 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
    159 //   character.
    160 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
    161 //   supports the given character or variation sequence.
    162 // - Returns 3 if the font family supports the variation sequence.
    163 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
    164 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
    165 // - Returns 1 if the variation selector is not specified or if the font family only supports the
    166 //   variation sequence's base character.
    167 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs,
    168         const std::shared_ptr<FontFamily>& fontFamily) const {
    169     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
    170     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
    171         // The font doesn't support either variation sequence or even the base character.
    172         return kUnsupportedFontScore;
    173     }
    174 
    175     if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
    176         // If the first font family supports the given character or variation sequence, always use
    177         // it.
    178         return kFirstFontScore;
    179     }
    180 
    181     if (vs == 0) {
    182         return 1;
    183     }
    184 
    185     if (hasVSGlyph) {
    186         return 3;
    187     }
    188 
    189     if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) {
    190         const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId());
    191         bool hasEmojiFlag = false;
    192         for (size_t i = 0; i < langs.size(); ++i) {
    193             if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) {
    194                 hasEmojiFlag = true;
    195                 break;
    196             }
    197         }
    198 
    199         if (vs == EMOJI_STYLE_VS) {
    200             return hasEmojiFlag ? 2 : 1;
    201         } else {  // vs == TEXT_STYLE_VS
    202             return hasEmojiFlag ? 1 : 2;
    203         }
    204     }
    205     return 1;
    206 }
    207 
    208 // Calculate font scores based on the script matching, subtag matching and primary langauge matching.
    209 //
    210 // 1. If only the font's language matches or there is no matches between requested font and
    211 //    supported font, then the font obtains a score of 0.
    212 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
    213 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
    214 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
    215 //    language-and-script obtains a socre of 3 with the same reason above.
    216 //
    217 // If two languages in the requested list have the same language score, the font matching with
    218 // higher priority language gets a higher score. For example, in the case the user requested
    219 // language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score
    220 // than the font of "en-Latn".
    221 //
    222 // To achieve score calculation with priorities, the language score is determined as follows:
    223 //   LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
    224 // Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's
    225 // matching score. The possible values of s(i) are 0, 1, 2, 3 and 4.
    226 uint32_t FontCollection::calcLanguageMatchingScore(
    227         uint32_t userLangListId, const FontFamily& fontFamily) {
    228     const FontLanguages& langList = FontLanguageListCache::getById(userLangListId);
    229     const FontLanguages& fontLanguages = FontLanguageListCache::getById(fontFamily.langId());
    230 
    231     const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
    232     uint32_t score = 0;
    233     for (size_t i = 0; i < maxCompareNum; ++i) {
    234         score = score * 5u + langList[i].calcScoreFor(fontLanguages);
    235     }
    236     return score;
    237 }
    238 
    239 // Calculates a font score based on variant ("compact" or "elegant") matching.
    240 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
    241 //  - No score if the font has a variant but it doesn't match with the text style.
    242 uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) {
    243     return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
    244 }
    245 
    246 // Implement heuristic for choosing best-match font. Here are the rules:
    247 // 1. If first font in the collection has the character, it wins.
    248 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
    249 // 3. Highest score wins, with ties resolved to the first font.
    250 // This method never returns nullptr.
    251 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
    252             uint32_t langListId, int variant) const {
    253     if (ch >= mMaxChar) {
    254         return mFamilies[0];
    255     }
    256 
    257     Range range = mRanges[ch >> kLogCharsPerPage];
    258 
    259     if (vs != 0) {
    260         range = { 0, static_cast<uint16_t>(mFamilies.size()) };
    261     }
    262 
    263 #ifdef VERBOSE_DEBUG
    264     ALOGD("querying range %zd:%zd\n", range.start, range.end);
    265 #endif
    266     int bestFamilyIndex = -1;
    267     uint32_t bestScore = kUnsupportedFontScore;
    268     for (size_t i = range.start; i < range.end; i++) {
    269         const std::shared_ptr<FontFamily>& family =
    270                 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
    271         const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
    272         if (score == kFirstFontScore) {
    273             // If the first font family supports the given character or variation sequence, always
    274             // use it.
    275             return family;
    276         }
    277         if (score > bestScore) {
    278             bestScore = score;
    279             bestFamilyIndex = i;
    280         }
    281     }
    282     if (bestFamilyIndex == -1) {
    283         UErrorCode errorCode = U_ZERO_ERROR;
    284         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
    285         if (U_SUCCESS(errorCode)) {
    286             UChar decomposed[4];
    287             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
    288             if (U_SUCCESS(errorCode) && len > 0) {
    289                 int off = 0;
    290                 U16_NEXT_UNSAFE(decomposed, off, ch);
    291                 return getFamilyForChar(ch, vs, langListId, variant);
    292             }
    293         }
    294         return mFamilies[0];
    295     }
    296     return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
    297 }
    298 
    299 // Characters where we want to continue using existing font run for (or stick to the next run if
    300 // they start a string), even if the font does not support them explicitly. These are handled
    301 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
    302 // usually meaningless to switch to a different font to display them.
    303 static bool doesNotNeedFontSupport(uint32_t c) {
    304     return c == 0x00AD // SOFT HYPHEN
    305             || c == 0x034F // COMBINING GRAPHEME JOINER
    306             || c == 0x061C // ARABIC LETTER MARK
    307             || (0x200C <= c && c <= 0x200F) // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
    308             || (0x202A <= c && c <= 0x202E) // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
    309             || (0x2066 <= c && c <= 0x2069) // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
    310             || c == 0xFEFF // BYTE ORDER MARK
    311             || isVariationSelector(c);
    312 }
    313 
    314 // Characters where we want to continue using existing font run instead of
    315 // recomputing the best match in the fallback list.
    316 static const uint32_t stickyWhitelist[] = {
    317     '!',
    318     ',',
    319     '-',
    320     '.',
    321     ':',
    322     ';',
    323     '?',
    324     0x00A0, // NBSP
    325     0x2010, // HYPHEN
    326     0x2011, // NB_HYPHEN
    327     0x202F, // NNBSP
    328     0x2640, // FEMALE_SIGN,
    329     0x2642, // MALE_SIGN,
    330     0x2695, // STAFF_OF_AESCULAPIUS
    331 };
    332 
    333 static bool isStickyWhitelisted(uint32_t c) {
    334     for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
    335         if (stickyWhitelist[i] == c) return true;
    336     }
    337     return false;
    338 }
    339 
    340 static inline bool isCombining(uint32_t c) {
    341     return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
    342 }
    343 
    344 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
    345         uint32_t variationSelector) const {
    346     if (!isVariationSelector(variationSelector)) {
    347         return false;
    348     }
    349     if (baseCodepoint >= mMaxChar) {
    350         return false;
    351     }
    352 
    353     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
    354     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
    355         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
    356             return true;
    357         }
    358     }
    359 
    360     // TODO: We can remove this lock by precomputing color emoji information.
    361     android::AutoMutex _l(gMinikinLock);
    362 
    363     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
    364     // for <char, text presentation selector> case since we have special fallback rule for the
    365     // sequence. Note that we don't need to restrict this to already standardized variation
    366     // sequences, since Unicode is adding variation sequences more frequently now and may even move
    367     // towards allowing text and emoji variation selectors on any character.
    368     if (variationSelector == TEXT_STYLE_VS) {
    369         for (size_t i = 0; i < mFamilies.size(); ++i) {
    370             if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
    371                 return true;
    372             }
    373         }
    374     }
    375 
    376     return false;
    377 }
    378 
    379 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
    380 
    381 void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
    382         vector<Run>* result) const {
    383     const uint32_t langListId = style.getLanguageListId();
    384     int variant = style.getVariant();
    385     const FontFamily* lastFamily = nullptr;
    386     Run* run = nullptr;
    387 
    388     if (string_size == 0) {
    389         return;
    390     }
    391 
    392     const uint32_t kEndOfString = 0xFFFFFFFF;
    393 
    394     uint32_t nextCh = 0;
    395     uint32_t prevCh = 0;
    396     size_t nextUtf16Pos = 0;
    397     size_t readLength = 0;
    398     U16_NEXT(string, readLength, string_size, nextCh);
    399     if (U_IS_SURROGATE(nextCh)) {
    400         nextCh = REPLACEMENT_CHARACTER;
    401     }
    402 
    403     do {
    404         const uint32_t ch = nextCh;
    405         const size_t utf16Pos = nextUtf16Pos;
    406         nextUtf16Pos = readLength;
    407         if (readLength < string_size) {
    408             U16_NEXT(string, readLength, string_size, nextCh);
    409             if (U_IS_SURROGATE(nextCh)) {
    410                 nextCh = REPLACEMENT_CHARACTER;
    411             }
    412         } else {
    413             nextCh = kEndOfString;
    414         }
    415 
    416         bool shouldContinueRun = false;
    417         if (doesNotNeedFontSupport(ch)) {
    418             // Always continue if the character is a format character not needed to be in the font.
    419             shouldContinueRun = true;
    420         } else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) {
    421             // Continue using existing font as long as it has coverage and is whitelisted.
    422             shouldContinueRun = lastFamily->getCoverage().get(ch);
    423         }
    424 
    425         if (!shouldContinueRun) {
    426             const std::shared_ptr<FontFamily>& family = getFamilyForChar(
    427                     ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant);
    428             if (utf16Pos == 0 || family.get() != lastFamily) {
    429                 size_t start = utf16Pos;
    430                 // Workaround for combining marks and emoji modifiers until we implement
    431                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
    432                 // a different font that also supports the previous character, attach previous
    433                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
    434                 // handled properly by this since it's a combining mark too.
    435                 if (utf16Pos != 0 &&
    436                         (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
    437                         family != nullptr && family->getCoverage().get(prevCh)) {
    438                     const size_t prevChLength = U16_LENGTH(prevCh);
    439                     if (run != nullptr) {
    440                         run->end -= prevChLength;
    441                         if (run->start == run->end) {
    442                             result->pop_back();
    443                         }
    444                     }
    445                     start -= prevChLength;
    446                 }
    447                 if (lastFamily == nullptr) {
    448                     // This is the first family ever assigned. We are either seeing the very first
    449                     // character (which means start would already be zero), or we have only seen
    450                     // characters that don't need any font support (which means we need to adjust
    451                     // start to be 0 to include those characters).
    452                     start = 0;
    453                 }
    454                 result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
    455                 run = &result->back();
    456                 lastFamily = family.get();
    457             }
    458         }
    459         prevCh = ch;
    460         if (run != nullptr) {
    461             run->end = nextUtf16Pos;  // exclusive
    462         }
    463     } while (nextCh != kEndOfString);
    464 
    465     if (lastFamily == nullptr) {
    466         // No character needed any font support, so it doesn't really matter which font they end up
    467         // getting displayed in. We put the whole string in one run, using the first font.
    468         result->push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)});
    469     }
    470 }
    471 
    472 FakedFont FontCollection::baseFontFaked(FontStyle style) {
    473     return mFamilies[0]->getClosestMatch(style);
    474 }
    475 
    476 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
    477         const std::vector<FontVariation>& variations) {
    478     if (variations.empty() || mSupportedAxes.empty()) {
    479         return nullptr;
    480     }
    481 
    482     bool hasSupportedAxis = false;
    483     for (const FontVariation& variation : variations) {
    484         if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
    485             hasSupportedAxis = true;
    486             break;
    487         }
    488     }
    489     if (!hasSupportedAxis) {
    490         // None of variation axes are supported by this font collection.
    491         return nullptr;
    492     }
    493 
    494     std::vector<std::shared_ptr<FontFamily> > families;
    495     for (const std::shared_ptr<FontFamily>& family : mFamilies) {
    496         std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
    497         if (newFamily) {
    498             families.push_back(newFamily);
    499         } else {
    500             families.push_back(family);
    501         }
    502     }
    503 
    504     return std::shared_ptr<FontCollection>(new FontCollection(families));
    505 }
    506 
    507 uint32_t FontCollection::getId() const {
    508     return mId;
    509 }
    510 
    511 }  // namespace minikin
    512