Home | History | Annotate | Download | only in minikin
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef MINIKIN_FONT_LANGUAGE_H
     18 #define MINIKIN_FONT_LANGUAGE_H
     19 
     20 #include <string>
     21 #include <vector>
     22 
     23 #include <hb.h>
     24 
     25 namespace minikin {
     26 
     27 // Due to the limits in font fallback score calculation, we can't use anything more than 12
     28 // languages.
     29 const size_t FONT_LANGUAGES_LIMIT = 12;
     30 
     31 // The language or region code is encoded to 15 bits.
     32 const uint16_t INVALID_CODE = 0x7fff;
     33 
     34 class FontLanguages;
     35 
     36 // FontLanguage is a compact representation of a BCP 47 language tag. It
     37 // does not capture all possible information, only what directly affects
     38 // font rendering.
     39 struct FontLanguage {
     40 public:
     41     enum EmojiStyle : uint8_t {
     42         EMSTYLE_EMPTY = 0,
     43         EMSTYLE_DEFAULT = 1,
     44         EMSTYLE_EMOJI = 2,
     45         EMSTYLE_TEXT = 3,
     46     };
     47     // Default constructor creates the unsupported language.
     48     FontLanguage()
     49             : mScript(0ul),
     50             mLanguage(INVALID_CODE),
     51             mRegion(INVALID_CODE),
     52             mHbLanguage(HB_LANGUAGE_INVALID),
     53             mSubScriptBits(0ul),
     54             mEmojiStyle(EMSTYLE_EMPTY) {}
     55 
     56     // Parse from string
     57     FontLanguage(const char* buf, size_t length);
     58 
     59     bool operator==(const FontLanguage other) const {
     60         return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage &&
     61                 mRegion == other.mRegion && mEmojiStyle == other.mEmojiStyle;
     62     }
     63 
     64     bool operator!=(const FontLanguage other) const {
     65         return !(*this == other);
     66     }
     67 
     68     bool isUnsupported() const { return mLanguage == INVALID_CODE; }
     69     EmojiStyle getEmojiStyle() const { return mEmojiStyle; }
     70     hb_language_t getHbLanguage() const { return mHbLanguage; }
     71 
     72 
     73     bool isEqualScript(const FontLanguage& other) const;
     74 
     75     // Returns true if this script supports the given script. For example, ja-Jpan supports Hira,
     76     // ja-Hira doesn't support Jpan.
     77     bool supportsHbScript(hb_script_t script) const;
     78 
     79     std::string getString() const;
     80 
     81     // Calculates a matching score. This score represents how well the input languages cover this
     82     // language. The maximum score in the language list is returned.
     83     // 0 = no match, 1 = script match, 2 = script and primary language match.
     84     int calcScoreFor(const FontLanguages& supported) const;
     85 
     86     uint64_t getIdentifier() const {
     87         return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 17) | ((uint64_t)mRegion << 2) |
     88                 mEmojiStyle;
     89     }
     90 
     91 private:
     92     friend class FontLanguages;  // for FontLanguages constructor
     93 
     94     // ISO 15924 compliant script code. The 4 chars script code are packed into a 32 bit integer.
     95     uint32_t mScript;
     96 
     97     // ISO 639-1 or ISO 639-2 compliant language code.
     98     // The two- or three-letter language code is packed into a 15 bit integer.
     99     // mLanguage = 0 means the FontLanguage is unsupported.
    100     uint16_t mLanguage;
    101 
    102     // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is
    103     // packed into a 15 bit integer.
    104     uint16_t mRegion;
    105 
    106     // The language to be passed HarfBuzz shaper.
    107     hb_language_t mHbLanguage;
    108 
    109     // For faster comparing, use 7 bits for specific scripts.
    110     static const uint8_t kBopomofoFlag = 1u;
    111     static const uint8_t kHanFlag = 1u << 1;
    112     static const uint8_t kHangulFlag = 1u << 2;
    113     static const uint8_t kHiraganaFlag = 1u << 3;
    114     static const uint8_t kKatakanaFlag = 1u << 4;
    115     static const uint8_t kSimplifiedChineseFlag = 1u << 5;
    116     static const uint8_t kTraditionalChineseFlag = 1u << 6;
    117     uint8_t mSubScriptBits;
    118 
    119     EmojiStyle mEmojiStyle;
    120 
    121     static uint8_t scriptToSubScriptBits(uint32_t script);
    122 
    123     static EmojiStyle resolveEmojiStyle(const char* buf, size_t length, uint32_t script);
    124 
    125     // Returns true if the provide subscript bits has the requested subscript bits.
    126     // Note that this function returns false if the requested subscript bits are empty.
    127     static bool supportsScript(uint8_t providedBits, uint8_t requestedBits);
    128 };
    129 
    130 // An immutable list of languages.
    131 class FontLanguages {
    132 public:
    133     explicit FontLanguages(std::vector<FontLanguage>&& languages);
    134     FontLanguages() : mUnionOfSubScriptBits(0), mIsAllTheSameLanguage(false) {}
    135     FontLanguages(FontLanguages&&) = default;
    136 
    137     size_t size() const { return mLanguages.size(); }
    138     bool empty() const { return mLanguages.empty(); }
    139     const FontLanguage& operator[] (size_t n) const { return mLanguages[n]; }
    140 
    141 private:
    142     friend struct FontLanguage;  // for calcScoreFor
    143 
    144     std::vector<FontLanguage> mLanguages;
    145     uint8_t mUnionOfSubScriptBits;
    146     bool mIsAllTheSameLanguage;
    147 
    148     uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; }
    149     bool isAllTheSameLanguage() const { return mIsAllTheSameLanguage; }
    150 
    151     // Do not copy and assign.
    152     FontLanguages(const FontLanguages&) = delete;
    153     void operator=(const FontLanguages&) = delete;
    154 };
    155 
    156 }  // namespace minikin
    157 
    158 #endif  // MINIKIN_FONT_LANGUAGE_H
    159