Home | History | Annotate | Download | only in minikin
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 // Determine coverage of font given its raw "cmap" OpenType table
     18 
     19 #define LOG_TAG "Minikin"
     20 
     21 #include <algorithm>
     22 #include <vector>
     23 using std::vector;
     24 
     25 #include <log/log.h>
     26 
     27 #include <minikin/SparseBitSet.h>
     28 #include <minikin/CmapCoverage.h>
     29 #include "MinikinInternal.h"
     30 
     31 #include <MinikinInternal.h>
     32 
     33 namespace minikin {
     34 
     35 constexpr uint32_t U32MAX = std::numeric_limits<uint32_t>::max();
     36 
     37 // These could perhaps be optimized to use __builtin_bswap16 and friends.
     38 static uint32_t readU16(const uint8_t* data, size_t offset) {
     39     return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
     40 }
     41 
     42 static uint32_t readU24(const uint8_t* data, size_t offset) {
     43     return ((uint32_t)data[offset]) << 16 | ((uint32_t)data[offset + 1]) << 8 |
     44         ((uint32_t)data[offset + 2]);
     45 }
     46 
     47 static uint32_t readU32(const uint8_t* data, size_t offset) {
     48     return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
     49         ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
     50 }
     51 
     52 // The start must be larger than or equal to coverage.back() if coverage is not empty.
     53 // Returns true if the range is appended. Otherwise returns false as an error.
     54 static bool addRange(vector<uint32_t> &coverage, uint32_t start, uint32_t end) {
     55 #ifdef VERBOSE_DEBUG
     56     ALOGD("adding range %d-%d\n", start, end);
     57 #endif
     58     if (coverage.empty() || coverage.back() < start) {
     59         coverage.push_back(start);
     60         coverage.push_back(end);
     61         return true;
     62     } else if (coverage.back() == start) {
     63         coverage.back() = end;
     64         return true;
     65     } else {
     66         // Reject unordered range input since SparseBitSet assumes that the given range vector is
     67         // sorted. OpenType specification says cmap entries are sorted in order of code point
     68         // values, thus for OpenType compliant font files, we don't reach here.
     69         android_errorWriteLog(0x534e4554, "32178311");
     70         return false;
     71     }
     72 }
     73 
     74 struct Range {
     75     uint32_t start;  // inclusive
     76     uint32_t end;  // exclusive
     77 
     78     static Range InvalidRange() {
     79         return Range({ U32MAX, U32MAX });
     80     }
     81 
     82     inline bool isValid() const {
     83         return start != U32MAX && end != U32MAX;
     84     }
     85 
     86     // Returns true if left and right intersect.
     87     inline static bool intersects(const Range& left, const Range& right) {
     88         return left.isValid() && right.isValid() &&
     89                 left.start < right.end && right.start < left.end;
     90     }
     91 
     92     // Returns merged range. This method assumes left and right are not invalid ranges and they have
     93     // an intersection.
     94     static Range merge(const Range& left, const Range& right) {
     95         return Range({ std::min(left.start, right.start), std::max(left.end, right.end) });
     96     }
     97 };
     98 
     99 // Returns Range from given ranges vector. Returns InvalidRange if i is out of range.
    100 static inline Range getRange(const std::vector<uint32_t>& r, size_t i) {
    101     return i + 1 < r.size() ? Range({ r[i], r[i + 1] }) : Range::InvalidRange();
    102 }
    103 
    104 // Merge two sorted lists of ranges into one sorted list.
    105 static std::vector<uint32_t> mergeRanges(
    106         const std::vector<uint32_t>& lRanges, const std::vector<uint32_t>& rRanges) {
    107     std::vector<uint32_t> out;
    108 
    109     const size_t lsize = lRanges.size();
    110     const size_t rsize = rRanges.size();
    111     out.reserve(lsize + rsize);
    112     size_t ri = 0;
    113     size_t li = 0;
    114     while (li < lsize || ri < rsize) {
    115         Range left = getRange(lRanges, li);
    116         Range right = getRange(rRanges, ri);
    117 
    118         if (!right.isValid()) {
    119             // No ranges left in rRanges. Just put all remaining ranges in lRanges.
    120             do {
    121                 Range r = getRange(lRanges, li);
    122                 addRange(out, r.start, r.end);  // Input is sorted. Never returns false.
    123                 li += 2;
    124             } while (li < lsize);
    125             break;
    126         } else if (!left.isValid()) {
    127             // No ranges left in lRanges. Just put all remaining ranges in rRanges.
    128             do {
    129                 Range r = getRange(rRanges, ri);
    130                 addRange(out, r.start, r.end);  // Input is sorted. Never returns false.
    131                 ri += 2;
    132             } while (ri < rsize);
    133             break;
    134         } else if (!Range::intersects(left, right)) {
    135             // No intersection. Add smaller range.
    136             if (left.start < right.start) {
    137                 addRange(out, left.start, left.end);  // Input is sorted. Never returns false.
    138                 li += 2;
    139             } else {
    140                 addRange(out, right.start, right.end);  // Input is sorted. Never returns false.
    141                 ri += 2;
    142             }
    143         } else {
    144             Range merged = Range::merge(left, right);
    145             li += 2;
    146             ri += 2;
    147             left = getRange(lRanges, li);
    148             right = getRange(rRanges, ri);
    149             while (Range::intersects(merged, left) || Range::intersects(merged, right)) {
    150                 if (Range::intersects(merged, left)) {
    151                     merged = Range::merge(merged, left);
    152                     li += 2;
    153                     left = getRange(lRanges, li);
    154                 } else {
    155                     merged = Range::merge(merged, right);
    156                     ri += 2;
    157                     right = getRange(rRanges, ri);
    158                 }
    159             }
    160             addRange(out, merged.start, merged.end);  // Input is sorted. Never returns false.
    161         }
    162     }
    163 
    164     return out;
    165 }
    166 
    167 // Get the coverage information out of a Format 4 subtable, storing it in the coverage vector
    168 static bool getCoverageFormat4(vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
    169     const size_t kSegCountOffset = 6;
    170     const size_t kEndCountOffset = 14;
    171     const size_t kHeaderSize = 16;
    172     const size_t kSegmentSize = 8;  // total size of array elements for one segment
    173     if (kEndCountOffset > size) {
    174         return false;
    175     }
    176     size_t segCount = readU16(data, kSegCountOffset) >> 1;
    177     if (kHeaderSize + segCount * kSegmentSize > size) {
    178         return false;
    179     }
    180     for (size_t i = 0; i < segCount; i++) {
    181         uint32_t end = readU16(data, kEndCountOffset + 2 * i);
    182         uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i));
    183         if (end < start) {
    184             // invalid segment range: size must be positive
    185             android_errorWriteLog(0x534e4554, "26413177");
    186             return false;
    187         }
    188         uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i));
    189         if (rangeOffset == 0) {
    190             uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i));
    191             if (((end + delta) & 0xffff) > end - start) {
    192                 if (!addRange(coverage, start, end + 1)) {
    193                     return false;
    194                 }
    195             } else {
    196                 for (uint32_t j = start; j < end + 1; j++) {
    197                     if (((j + delta) & 0xffff) != 0) {
    198                         if (!addRange(coverage, j, j + 1)) {
    199                             return false;
    200                         }
    201                     }
    202                 }
    203             }
    204         } else {
    205             for (uint32_t j = start; j < end + 1; j++) {
    206                 uint32_t actualRangeOffset = kHeaderSize + 6 * segCount + rangeOffset +
    207                     (i + j - start) * 2;
    208                 if (actualRangeOffset + 2 > size) {
    209                     // invalid rangeOffset is considered a "warning" by OpenType Sanitizer
    210                     continue;
    211                 }
    212                 uint32_t glyphId = readU16(data, actualRangeOffset);
    213                 if (glyphId != 0) {
    214                     if (!addRange(coverage, j, j + 1)) {
    215                         return false;
    216                     }
    217                 }
    218             }
    219         }
    220     }
    221     return true;
    222 }
    223 
    224 // Get the coverage information out of a Format 12 subtable, storing it in the coverage vector
    225 static bool getCoverageFormat12(vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
    226     const size_t kNGroupsOffset = 12;
    227     const size_t kFirstGroupOffset = 16;
    228     const size_t kGroupSize = 12;
    229     const size_t kStartCharCodeOffset = 0;
    230     const size_t kEndCharCodeOffset = 4;
    231     const size_t kMaxNGroups = 0xfffffff0 / kGroupSize;  // protection against overflow
    232     // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits.
    233     if (kFirstGroupOffset > size) {
    234         return false;
    235     }
    236     uint32_t nGroups = readU32(data, kNGroupsOffset);
    237     if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) {
    238         android_errorWriteLog(0x534e4554, "25645298");
    239         return false;
    240     }
    241     for (uint32_t i = 0; i < nGroups; i++) {
    242         uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize;
    243         uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset);
    244         uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset);
    245         if (end < start) {
    246             // invalid group range: size must be positive
    247             android_errorWriteLog(0x534e4554, "26413177");
    248             return false;
    249         }
    250 
    251         // No need to read outside of Unicode code point range.
    252         if (start > MAX_UNICODE_CODE_POINT) {
    253             return true;
    254         }
    255         if (end > MAX_UNICODE_CODE_POINT) {
    256             // file is inclusive, vector is exclusive
    257             if (end == 0xFFFFFFFF) {
    258                 android_errorWriteLog(0x534e4554, "62134807");
    259             }
    260             return addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1);
    261         }
    262         if (!addRange(coverage, start, end + 1)) {  // file is inclusive, vector is exclusive
    263             return false;
    264         }
    265     }
    266     return true;
    267 }
    268 
    269 // Lower value has higher priority. 0 for the highest priority table.
    270 // kLowestPriority for unsupported tables.
    271 // This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it.
    272 constexpr uint8_t kLowestPriority = 255;
    273 uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) {
    274     if (platformId == 3 && encodingId == 10) {
    275         return 0;
    276     }
    277     if (platformId == 0 && encodingId == 6) {
    278         return 1;
    279     }
    280     if (platformId == 0 && encodingId == 4) {
    281         return 2;
    282     }
    283     if (platformId == 3 && encodingId == 1) {
    284         return 3;
    285     }
    286     if (platformId == 0 && encodingId == 3) {
    287         return 4;
    288     }
    289     if (platformId == 0 && encodingId == 2) {
    290         return 5;
    291     }
    292     if (platformId == 0 && encodingId == 1) {
    293         return 6;
    294     }
    295     if (platformId == 0 && encodingId == 0) {
    296         return 7;
    297     }
    298     // Tables other than above are not supported.
    299     return kLowestPriority;
    300 }
    301 
    302 // Get merged coverage information from default UVS Table and non-default UVS Table. Note that this
    303 // function assumes code points in both default UVS Table and non-default UVS table are stored in
    304 // ascending order. This is required by the standard.
    305 static bool getVSCoverage(std::vector<uint32_t>* out_ranges, const uint8_t* data, size_t size,
    306         uint32_t defaultUVSTableOffset, uint32_t nonDefaultUVSTableOffset,
    307         const SparseBitSet& baseCoverage) {
    308     // Need to merge supported ranges from default UVS Table and non-default UVS Table.
    309     // First, collect all supported code points from non default UVS table.
    310     std::vector<uint32_t> rangesFromNonDefaultUVSTable;
    311     if (nonDefaultUVSTableOffset != 0) {
    312         constexpr size_t kHeaderSize = 4;
    313         constexpr size_t kUVSMappingRecordSize = 5;
    314 
    315         const uint8_t* nonDefaultUVSTable = data + nonDefaultUVSTableOffset;
    316         // This subtraction doesn't underflow since the caller already checked
    317         // size > nonDefaultUVSTableOffset.
    318         const size_t nonDefaultUVSTableRemaining = size - nonDefaultUVSTableOffset;
    319         if (nonDefaultUVSTableRemaining < kHeaderSize) {
    320             return false;
    321         }
    322         const uint32_t numRecords = readU32(nonDefaultUVSTable, 0);
    323         if (numRecords * kUVSMappingRecordSize + kHeaderSize > nonDefaultUVSTableRemaining) {
    324             return false;
    325         }
    326         for (uint32_t i = 0; i < numRecords; ++i) {
    327             const size_t recordOffset = kHeaderSize + kUVSMappingRecordSize * i;
    328             const uint32_t codePoint = readU24(nonDefaultUVSTable, recordOffset);
    329             if (!addRange(rangesFromNonDefaultUVSTable, codePoint, codePoint + 1)) {
    330                 return false;
    331             }
    332         }
    333     }
    334 
    335     // Then, construct range from default UVS Table with merging code points from non default UVS
    336     // table.
    337     std::vector<uint32_t> rangesFromDefaultUVSTable;
    338     if (defaultUVSTableOffset != 0) {
    339         constexpr size_t kHeaderSize = 4;
    340         constexpr size_t kUnicodeRangeRecordSize = 4;
    341 
    342         const uint8_t* defaultUVSTable = data + defaultUVSTableOffset;
    343         // This subtraction doesn't underflow since the caller already checked
    344         // size > defaultUVSTableOffset.
    345         const size_t defaultUVSTableRemaining = size - defaultUVSTableOffset;
    346 
    347         if (defaultUVSTableRemaining < kHeaderSize) {
    348             return false;
    349         }
    350         const uint32_t numRecords = readU32(defaultUVSTable, 0);
    351         if (numRecords * kUnicodeRangeRecordSize + kHeaderSize > defaultUVSTableRemaining) {
    352             return false;
    353         }
    354 
    355         for (uint32_t i = 0; i < numRecords; ++i) {
    356             const size_t recordOffset = kHeaderSize + kUnicodeRangeRecordSize * i;
    357             const uint32_t startCp = readU24(defaultUVSTable, recordOffset);
    358             const uint8_t rangeLength = defaultUVSTable[recordOffset + 3];
    359 
    360             // Then insert range from default UVS Table, but exclude if the base codepoint is not
    361             // supported.
    362             for (uint32_t cp = startCp; cp <= startCp + rangeLength; ++cp) {
    363                 // All codepoints in default UVS table should go to the glyphs of the codepoints
    364                 // without variation selectors. We need to check the default glyph availability and
    365                 // exclude the codepoint if it is not supported by defualt cmap table.
    366                 if (baseCoverage.get(cp)) {
    367                     if (!addRange(rangesFromDefaultUVSTable, cp, cp + 1 /* exclusive */)) {
    368                         return false;
    369                     }
    370                 }
    371             }
    372         }
    373     }
    374     *out_ranges = mergeRanges(rangesFromDefaultUVSTable, rangesFromNonDefaultUVSTable);
    375     return true;
    376 }
    377 
    378 static void getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>>* out,
    379         const uint8_t* data, size_t size, const SparseBitSet& baseCoverage) {
    380     constexpr size_t kHeaderSize = 10;
    381     constexpr size_t kRecordSize = 11;
    382     constexpr size_t kLengthOffset = 2;
    383     constexpr size_t kNumRecordOffset = 6;
    384 
    385     out->clear();
    386     if (size < kHeaderSize) {
    387         return;
    388     }
    389 
    390     const uint32_t length = readU32(data, kLengthOffset);
    391     if (size < length) {
    392         return;
    393     }
    394 
    395     uint32_t numRecords = readU32(data, kNumRecordOffset);
    396     if (numRecords == 0 || kHeaderSize + kRecordSize * numRecords > length) {
    397         return;
    398     }
    399 
    400     for (uint32_t i = 0; i < numRecords; ++i) {
    401         // Insert from the largest code points since it determines the size of the output vector.
    402         const uint32_t recordHeadOffset = kHeaderSize + kRecordSize * (numRecords - i - 1);
    403         const uint32_t vsCodePoint = readU24(data, recordHeadOffset);
    404         const uint32_t defaultUVSOffset = readU32(data, recordHeadOffset + 3);
    405         const uint32_t nonDefaultUVSOffset = readU32(data, recordHeadOffset + 7);
    406         if (defaultUVSOffset > length || nonDefaultUVSOffset > length) {
    407             continue;
    408         }
    409 
    410         const uint16_t vsIndex = getVsIndex(vsCodePoint);
    411         if (vsIndex == INVALID_VS_INDEX) {
    412             continue;
    413         }
    414         std::vector<uint32_t> ranges;
    415         if (!getVSCoverage(&ranges, data, length, defaultUVSOffset, nonDefaultUVSOffset,
    416                 baseCoverage)) {
    417             continue;
    418         }
    419         if (out->size() < vsIndex + 1) {
    420             out->resize(vsIndex + 1);
    421         }
    422         (*out)[vsIndex].reset(new SparseBitSet(ranges.data(), ranges.size() >> 1));
    423     }
    424 
    425     out->shrink_to_fit();
    426 }
    427 
    428 SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size,
    429         std::vector<std::unique_ptr<SparseBitSet>>* out) {
    430     constexpr size_t kHeaderSize = 4;
    431     constexpr size_t kNumTablesOffset = 2;
    432     constexpr size_t kTableSize = 8;
    433     constexpr size_t kPlatformIdOffset = 0;
    434     constexpr size_t kEncodingIdOffset = 2;
    435     constexpr size_t kOffsetOffset = 4;
    436     constexpr size_t kFormatOffset = 0;
    437     constexpr uint32_t kNoTable = UINT32_MAX;
    438 
    439     if (kHeaderSize > cmap_size) {
    440         return SparseBitSet();
    441     }
    442     uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
    443     if (kHeaderSize + numTables * kTableSize > cmap_size) {
    444         return SparseBitSet();
    445     }
    446 
    447     uint32_t bestTableOffset = kNoTable;
    448     uint16_t bestTableFormat = 0;
    449     uint8_t bestTablePriority = kLowestPriority;
    450     uint32_t vsTableOffset = kNoTable;
    451     for (uint32_t i = 0; i < numTables; ++i) {
    452         const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize;
    453         const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset);
    454         const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset);
    455         const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset);
    456 
    457         if (offset > cmap_size - 2) {
    458             continue;  // Invalid table: not enough space to read.
    459         }
    460         const uint16_t format = readU16(cmap_data, offset + kFormatOffset);
    461 
    462         if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) {
    463             if (vsTableOffset == kNoTable && format == 14) {
    464                 vsTableOffset = offset;
    465             } else {
    466                 // Ignore the (0, 5) table if we have already seen another valid one or it's in a
    467                 // format we don't understand.
    468             }
    469         } else {
    470             uint32_t length;
    471             uint32_t language;
    472 
    473             if (format == 4) {
    474                 constexpr size_t lengthOffset = 2;
    475                 constexpr size_t languageOffset = 4;
    476                 constexpr size_t minTableSize = languageOffset + 2;
    477                 if (offset > cmap_size - minTableSize) {
    478                     continue;  // Invalid table: not enough space to read.
    479                 }
    480                 length = readU16(cmap_data, offset + lengthOffset);
    481                 language = readU16(cmap_data, offset + languageOffset);
    482             } else if (format == 12) {
    483                 constexpr size_t lengthOffset = 4;
    484                 constexpr size_t languageOffset = 8;
    485                 constexpr size_t minTableSize = languageOffset + 4;
    486                 if (offset > cmap_size - minTableSize) {
    487                     continue;  // Invalid table: not enough space to read.
    488                 }
    489                 length = readU32(cmap_data, offset + lengthOffset);
    490                 language = readU32(cmap_data, offset + languageOffset);
    491             } else {
    492                 continue;
    493             }
    494 
    495             if (length > cmap_size - offset) {
    496                 continue;  // Invalid table: table length is larger than whole cmap data size.
    497             }
    498             if (language != 0) {
    499                 // Unsupported or invalid table: this is either a subtable for the Macintosh
    500                 // platform (which we don't support), or an invalid subtable since language field
    501                 // should be zero for non-Macintosh subtables.
    502                 continue;
    503             }
    504             const uint8_t priority = getTablePriority(platformId, encodingId);
    505             if (priority < bestTablePriority) {
    506                 bestTableOffset = offset;
    507                 bestTablePriority = priority;
    508                 bestTableFormat = format;
    509             }
    510         }
    511         if (vsTableOffset != kNoTable && bestTablePriority == 0 /* highest priority */) {
    512             // Already found the highest priority table and variation sequences table. No need to
    513             // look at remaining tables.
    514             break;
    515         }
    516     }
    517 
    518     SparseBitSet coverage;
    519 
    520     if (bestTableOffset != kNoTable) {
    521         const uint8_t* tableData = cmap_data + bestTableOffset;
    522         const size_t tableSize = cmap_size - bestTableOffset;
    523         bool success;
    524         vector<uint32_t> coverageVec;
    525         if (bestTableFormat == 4) {
    526             success = getCoverageFormat4(coverageVec, tableData, tableSize);
    527         } else {
    528             success = getCoverageFormat12(coverageVec, tableData, tableSize);
    529         }
    530 
    531         if (success) {
    532             coverage = SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1);
    533         }
    534     }
    535 
    536     if (vsTableOffset != kNoTable) {
    537         const uint8_t* tableData = cmap_data + vsTableOffset;
    538         const size_t tableSize = cmap_size - vsTableOffset;
    539         getCoverageFormat14(out, tableData, tableSize, coverage);
    540     }
    541     return coverage;
    542 }
    543 
    544 }  // namespace minikin
    545