Home | History | Annotate | Download | only in minikin
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef MINIKIN_LOCALE_LIST_H
     18 #define MINIKIN_LOCALE_LIST_H
     19 
     20 #include <string>
     21 #include <vector>
     22 
     23 #include <hb.h>
     24 
     25 #include "StringPiece.h"
     26 
     27 namespace minikin {
     28 
     29 // Due to the limits in font fallback score calculation, we can't use anything more than 12 locales.
     30 const size_t FONT_LOCALE_LIMIT = 12;
     31 
     32 // The language or region code is encoded to 15 bits.
     33 constexpr uint16_t NO_LANGUAGE = 0x7fff;
     34 constexpr uint16_t NO_REGION = 0x7fff;
     35 // The script code is encoded to 20 bits.
     36 constexpr uint32_t NO_SCRIPT = 0xfffff;
     37 
     38 class LocaleList;
     39 
     40 // Enum for making sub-locale from FontLangauge.
     41 enum class SubtagBits : uint8_t {
     42     EMPTY = 0b00000000,
     43     LANGUAGE = 0b00000001,
     44     SCRIPT = 0b00000010,
     45     REGION = 0b00000100,
     46     VARIANT = 0b00001000,
     47     EMOJI = 0b00010000,
     48     ALL = 0b00011111,
     49 };
     50 
     51 inline constexpr SubtagBits operator&(SubtagBits l, SubtagBits r) {
     52     return static_cast<SubtagBits>(static_cast<uint8_t>(l) & static_cast<uint8_t>(r));
     53 }
     54 inline constexpr SubtagBits operator|(SubtagBits l, SubtagBits r) {
     55     return static_cast<SubtagBits>(static_cast<uint8_t>(l) | static_cast<uint8_t>(r));
     56 }
     57 
     58 // Enum for emoji style.
     59 enum class EmojiStyle : uint8_t {
     60     EMPTY = 0,    // No emoji style is specified.
     61     DEFAULT = 1,  // Default emoji style is specified.
     62     EMOJI = 2,    // Emoji (color) emoji style is specified.
     63     TEXT = 3,     // Text (black/white) emoji style is specified.
     64 };
     65 
     66 // Locale is a compact representation of a BCP 47 language tag.
     67 // It does not capture all possible information, only what directly affects text layout:
     68 // font rendering, hyphenation, word breaking, etc.
     69 struct Locale {
     70 public:
     71     enum class Variant : uint16_t {  // Up to 12 bits
     72         NO_VARIANT = 0x0000,
     73         GERMAN_1901_ORTHOGRAPHY = 0x0001,
     74         GERMAN_1996_ORTHOGRAPHY = 0x0002,
     75     };
     76 
     77     // Default constructor creates the unsupported locale.
     78     Locale()
     79             : mScript(NO_SCRIPT),
     80               mLanguage(NO_LANGUAGE),
     81               mRegion(NO_REGION),
     82               mSubScriptBits(0ul),
     83               mVariant(Variant::NO_VARIANT),
     84               mEmojiStyle(EmojiStyle::EMPTY) {}
     85 
     86     // Parse from string
     87     Locale(const StringPiece& buf);
     88 
     89     bool operator==(const Locale other) const {
     90         return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage &&
     91                mRegion == other.mRegion && mVariant == other.mVariant &&
     92                mEmojiStyle == other.mEmojiStyle;
     93     }
     94 
     95     bool operator!=(const Locale other) const { return !(*this == other); }
     96 
     97     inline bool hasLanguage() const { return mLanguage != NO_LANGUAGE; }
     98     inline bool hasScript() const { return mScript != NO_SCRIPT; }
     99     inline bool hasRegion() const { return mRegion != NO_REGION; }
    100     inline bool hasVariant() const { return mVariant != Variant::NO_VARIANT; }
    101     inline bool hasEmojiStyle() const { return mEmojiStyle != EmojiStyle::EMPTY; }
    102 
    103     inline bool isSupported() const {
    104         return hasLanguage() || hasScript() || hasRegion() || hasVariant() || hasEmojiStyle();
    105     }
    106 
    107     inline bool isUnsupported() const { return !isSupported(); }
    108 
    109     EmojiStyle getEmojiStyle() const { return mEmojiStyle; }
    110 
    111     bool isEqualScript(const Locale& other) const;
    112 
    113     // Returns true if this script supports the given script. For example, ja-Jpan supports Hira,
    114     // ja-Hira doesn't support Jpan.
    115     bool supportsHbScript(hb_script_t script) const;
    116 
    117     std::string getString() const;
    118 
    119     // Calculates a matching score. This score represents how well the input locales cover this
    120     // locale. The maximum score in the locale list is returned.
    121     // 0 = no match, 1 = script match, 2 = script and primary language match.
    122     int calcScoreFor(const LocaleList& supported) const;
    123 
    124     uint64_t getIdentifier() const {
    125         return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 29) | ((uint64_t)mRegion << 14) |
    126                ((uint64_t)mEmojiStyle << 12) | (uint64_t)mVariant;
    127     }
    128 
    129     Locale getPartialLocale(SubtagBits bits) const;
    130 
    131 private:
    132     friend class LocaleList;  // for LocaleList constructor
    133 
    134     // ISO 15924 compliant script code. The 4 chars script code are packed into a 20 bit integer.
    135     // If not specified, this is kInvalidScript.
    136     uint32_t mScript;
    137 
    138     // ISO 639-1 or ISO 639-2 compliant language code.
    139     // The two- or three-letter language code is packed into a 15 bit integer.
    140     // mLanguage = 0 means the Locale is unsupported.
    141     uint16_t mLanguage;
    142 
    143     // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is
    144     // packed into a 15 bit integer.
    145     uint16_t mRegion;
    146 
    147     // For faster comparing, use 7 bits for specific scripts.
    148     static const uint8_t kBopomofoFlag = 1u;
    149     static const uint8_t kHanFlag = 1u << 1;
    150     static const uint8_t kHangulFlag = 1u << 2;
    151     static const uint8_t kHiraganaFlag = 1u << 3;
    152     static const uint8_t kKatakanaFlag = 1u << 4;
    153     static const uint8_t kSimplifiedChineseFlag = 1u << 5;
    154     static const uint8_t kTraditionalChineseFlag = 1u << 6;
    155     uint8_t mSubScriptBits;
    156 
    157     Variant mVariant;
    158 
    159     EmojiStyle mEmojiStyle;
    160 
    161     static uint8_t scriptToSubScriptBits(uint32_t rawScript);
    162 
    163     static EmojiStyle resolveEmojiStyle(const char* buf, size_t length);
    164     static EmojiStyle scriptToEmojiStyle(uint32_t script);
    165 
    166     // Returns true if the provide subscript bits has the requested subscript bits.
    167     // Note that this function returns false if the requested subscript bits are empty.
    168     static bool supportsScript(uint8_t providedBits, uint8_t requestedBits);
    169 };
    170 
    171 // An immutable list of locale.
    172 class LocaleList {
    173 public:
    174     explicit LocaleList(std::vector<Locale>&& locales);
    175     LocaleList()
    176             : mUnionOfSubScriptBits(0),
    177               mIsAllTheSameLocale(false),
    178               mEmojiStyle(EmojiStyle::EMPTY) {}
    179     LocaleList(LocaleList&&) = default;
    180 
    181     size_t size() const { return mLocales.size(); }
    182     bool empty() const { return mLocales.empty(); }
    183     const Locale& operator[](size_t n) const { return mLocales[n]; }
    184 
    185     hb_language_t getHbLanguage(size_t n) const { return mHbLangs[n]; }
    186 
    187     // Returns an effective emoji style of this locale list.
    188     // The effective means the first non empty emoji style in the list.
    189     EmojiStyle getEmojiStyle() const { return mEmojiStyle; }
    190 
    191 private:
    192     friend struct Locale;  // for calcScoreFor
    193 
    194     std::vector<Locale> mLocales;
    195 
    196     // The languages to be passed to HarfBuzz shaper.
    197     std::vector<hb_language_t> mHbLangs;
    198     uint8_t mUnionOfSubScriptBits;
    199     bool mIsAllTheSameLocale;
    200     EmojiStyle mEmojiStyle;
    201 
    202     uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; }
    203     bool isAllTheSameLocale() const { return mIsAllTheSameLocale; }
    204 
    205     // Do not copy and assign.
    206     LocaleList(const LocaleList&) = delete;
    207     void operator=(const LocaleList&) = delete;
    208 };
    209 
    210 }  // namespace minikin
    211 
    212 #endif  // MINIKIN_LOCALE_LIST_H
    213