1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Determine coverage of font given its raw "cmap" OpenType table 18 19 #define LOG_TAG "Minikin" 20 21 #include <algorithm> 22 #include <vector> 23 using std::vector; 24 25 #include <log/log.h> 26 27 #include <minikin/SparseBitSet.h> 28 #include <minikin/CmapCoverage.h> 29 #include "MinikinInternal.h" 30 31 #include <MinikinInternal.h> 32 33 namespace minikin { 34 35 constexpr uint32_t U32MAX = std::numeric_limits<uint32_t>::max(); 36 37 // These could perhaps be optimized to use __builtin_bswap16 and friends. 38 static uint32_t readU16(const uint8_t* data, size_t offset) { 39 return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]); 40 } 41 42 static uint32_t readU24(const uint8_t* data, size_t offset) { 43 return ((uint32_t)data[offset]) << 16 | ((uint32_t)data[offset + 1]) << 8 | 44 ((uint32_t)data[offset + 2]); 45 } 46 47 static uint32_t readU32(const uint8_t* data, size_t offset) { 48 return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 | 49 ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]); 50 } 51 52 // The start must be larger than or equal to coverage.back() if coverage is not empty. 53 // Returns true if the range is appended. Otherwise returns false as an error. 54 static bool addRange(vector<uint32_t> &coverage, uint32_t start, uint32_t end) { 55 #ifdef VERBOSE_DEBUG 56 ALOGD("adding range %d-%d\n", start, end); 57 #endif 58 if (coverage.empty() || coverage.back() < start) { 59 coverage.push_back(start); 60 coverage.push_back(end); 61 return true; 62 } else if (coverage.back() == start) { 63 coverage.back() = end; 64 return true; 65 } else { 66 // Reject unordered range input since SparseBitSet assumes that the given range vector is 67 // sorted. OpenType specification says cmap entries are sorted in order of code point 68 // values, thus for OpenType compliant font files, we don't reach here. 69 android_errorWriteLog(0x534e4554, "32178311"); 70 return false; 71 } 72 } 73 74 struct Range { 75 uint32_t start; // inclusive 76 uint32_t end; // exclusive 77 78 static Range InvalidRange() { 79 return Range({ U32MAX, U32MAX }); 80 } 81 82 inline bool isValid() const { 83 return start != U32MAX && end != U32MAX; 84 } 85 86 // Returns true if left and right intersect. 87 inline static bool intersects(const Range& left, const Range& right) { 88 return left.isValid() && right.isValid() && 89 left.start < right.end && right.start < left.end; 90 } 91 92 // Returns merged range. This method assumes left and right are not invalid ranges and they have 93 // an intersection. 94 static Range merge(const Range& left, const Range& right) { 95 return Range({ std::min(left.start, right.start), std::max(left.end, right.end) }); 96 } 97 }; 98 99 // Returns Range from given ranges vector. Returns InvalidRange if i is out of range. 100 static inline Range getRange(const std::vector<uint32_t>& r, size_t i) { 101 return i + 1 < r.size() ? Range({ r[i], r[i + 1] }) : Range::InvalidRange(); 102 } 103 104 // Merge two sorted lists of ranges into one sorted list. 105 static std::vector<uint32_t> mergeRanges( 106 const std::vector<uint32_t>& lRanges, const std::vector<uint32_t>& rRanges) { 107 std::vector<uint32_t> out; 108 109 const size_t lsize = lRanges.size(); 110 const size_t rsize = rRanges.size(); 111 out.reserve(lsize + rsize); 112 size_t ri = 0; 113 size_t li = 0; 114 while (li < lsize || ri < rsize) { 115 Range left = getRange(lRanges, li); 116 Range right = getRange(rRanges, ri); 117 118 if (!right.isValid()) { 119 // No ranges left in rRanges. Just put all remaining ranges in lRanges. 120 do { 121 Range r = getRange(lRanges, li); 122 addRange(out, r.start, r.end); // Input is sorted. Never returns false. 123 li += 2; 124 } while (li < lsize); 125 break; 126 } else if (!left.isValid()) { 127 // No ranges left in lRanges. Just put all remaining ranges in rRanges. 128 do { 129 Range r = getRange(rRanges, ri); 130 addRange(out, r.start, r.end); // Input is sorted. Never returns false. 131 ri += 2; 132 } while (ri < rsize); 133 break; 134 } else if (!Range::intersects(left, right)) { 135 // No intersection. Add smaller range. 136 if (left.start < right.start) { 137 addRange(out, left.start, left.end); // Input is sorted. Never returns false. 138 li += 2; 139 } else { 140 addRange(out, right.start, right.end); // Input is sorted. Never returns false. 141 ri += 2; 142 } 143 } else { 144 Range merged = Range::merge(left, right); 145 li += 2; 146 ri += 2; 147 left = getRange(lRanges, li); 148 right = getRange(rRanges, ri); 149 while (Range::intersects(merged, left) || Range::intersects(merged, right)) { 150 if (Range::intersects(merged, left)) { 151 merged = Range::merge(merged, left); 152 li += 2; 153 left = getRange(lRanges, li); 154 } else { 155 merged = Range::merge(merged, right); 156 ri += 2; 157 right = getRange(rRanges, ri); 158 } 159 } 160 addRange(out, merged.start, merged.end); // Input is sorted. Never returns false. 161 } 162 } 163 164 return out; 165 } 166 167 // Get the coverage information out of a Format 4 subtable, storing it in the coverage vector 168 static bool getCoverageFormat4(vector<uint32_t>& coverage, const uint8_t* data, size_t size) { 169 const size_t kSegCountOffset = 6; 170 const size_t kEndCountOffset = 14; 171 const size_t kHeaderSize = 16; 172 const size_t kSegmentSize = 8; // total size of array elements for one segment 173 if (kEndCountOffset > size) { 174 return false; 175 } 176 size_t segCount = readU16(data, kSegCountOffset) >> 1; 177 if (kHeaderSize + segCount * kSegmentSize > size) { 178 return false; 179 } 180 for (size_t i = 0; i < segCount; i++) { 181 uint32_t end = readU16(data, kEndCountOffset + 2 * i); 182 uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i)); 183 if (end < start) { 184 // invalid segment range: size must be positive 185 android_errorWriteLog(0x534e4554, "26413177"); 186 return false; 187 } 188 uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i)); 189 if (rangeOffset == 0) { 190 uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i)); 191 if (((end + delta) & 0xffff) > end - start) { 192 if (!addRange(coverage, start, end + 1)) { 193 return false; 194 } 195 } else { 196 for (uint32_t j = start; j < end + 1; j++) { 197 if (((j + delta) & 0xffff) != 0) { 198 if (!addRange(coverage, j, j + 1)) { 199 return false; 200 } 201 } 202 } 203 } 204 } else { 205 for (uint32_t j = start; j < end + 1; j++) { 206 uint32_t actualRangeOffset = kHeaderSize + 6 * segCount + rangeOffset + 207 (i + j - start) * 2; 208 if (actualRangeOffset + 2 > size) { 209 // invalid rangeOffset is considered a "warning" by OpenType Sanitizer 210 continue; 211 } 212 uint32_t glyphId = readU16(data, actualRangeOffset); 213 if (glyphId != 0) { 214 if (!addRange(coverage, j, j + 1)) { 215 return false; 216 } 217 } 218 } 219 } 220 } 221 return true; 222 } 223 224 // Get the coverage information out of a Format 12 subtable, storing it in the coverage vector 225 static bool getCoverageFormat12(vector<uint32_t>& coverage, const uint8_t* data, size_t size) { 226 const size_t kNGroupsOffset = 12; 227 const size_t kFirstGroupOffset = 16; 228 const size_t kGroupSize = 12; 229 const size_t kStartCharCodeOffset = 0; 230 const size_t kEndCharCodeOffset = 4; 231 const size_t kMaxNGroups = 0xfffffff0 / kGroupSize; // protection against overflow 232 // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits. 233 if (kFirstGroupOffset > size) { 234 return false; 235 } 236 uint32_t nGroups = readU32(data, kNGroupsOffset); 237 if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) { 238 android_errorWriteLog(0x534e4554, "25645298"); 239 return false; 240 } 241 for (uint32_t i = 0; i < nGroups; i++) { 242 uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize; 243 uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset); 244 uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset); 245 if (end < start) { 246 // invalid group range: size must be positive 247 android_errorWriteLog(0x534e4554, "26413177"); 248 return false; 249 } 250 251 // No need to read outside of Unicode code point range. 252 if (start > MAX_UNICODE_CODE_POINT) { 253 return true; 254 } 255 if (end > MAX_UNICODE_CODE_POINT) { 256 // file is inclusive, vector is exclusive 257 if (end == 0xFFFFFFFF) { 258 android_errorWriteLog(0x534e4554, "62134807"); 259 } 260 return addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1); 261 } 262 if (!addRange(coverage, start, end + 1)) { // file is inclusive, vector is exclusive 263 return false; 264 } 265 } 266 return true; 267 } 268 269 // Lower value has higher priority. 0 for the highest priority table. 270 // kLowestPriority for unsupported tables. 271 // This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it. 272 constexpr uint8_t kLowestPriority = 255; 273 uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) { 274 if (platformId == 3 && encodingId == 10) { 275 return 0; 276 } 277 if (platformId == 0 && encodingId == 6) { 278 return 1; 279 } 280 if (platformId == 0 && encodingId == 4) { 281 return 2; 282 } 283 if (platformId == 3 && encodingId == 1) { 284 return 3; 285 } 286 if (platformId == 0 && encodingId == 3) { 287 return 4; 288 } 289 if (platformId == 0 && encodingId == 2) { 290 return 5; 291 } 292 if (platformId == 0 && encodingId == 1) { 293 return 6; 294 } 295 if (platformId == 0 && encodingId == 0) { 296 return 7; 297 } 298 // Tables other than above are not supported. 299 return kLowestPriority; 300 } 301 302 // Get merged coverage information from default UVS Table and non-default UVS Table. Note that this 303 // function assumes code points in both default UVS Table and non-default UVS table are stored in 304 // ascending order. This is required by the standard. 305 static bool getVSCoverage(std::vector<uint32_t>* out_ranges, const uint8_t* data, size_t size, 306 uint32_t defaultUVSTableOffset, uint32_t nonDefaultUVSTableOffset, 307 const SparseBitSet& baseCoverage) { 308 // Need to merge supported ranges from default UVS Table and non-default UVS Table. 309 // First, collect all supported code points from non default UVS table. 310 std::vector<uint32_t> rangesFromNonDefaultUVSTable; 311 if (nonDefaultUVSTableOffset != 0) { 312 constexpr size_t kHeaderSize = 4; 313 constexpr size_t kUVSMappingRecordSize = 5; 314 315 const uint8_t* nonDefaultUVSTable = data + nonDefaultUVSTableOffset; 316 // This subtraction doesn't underflow since the caller already checked 317 // size > nonDefaultUVSTableOffset. 318 const size_t nonDefaultUVSTableRemaining = size - nonDefaultUVSTableOffset; 319 if (nonDefaultUVSTableRemaining < kHeaderSize) { 320 return false; 321 } 322 const uint32_t numRecords = readU32(nonDefaultUVSTable, 0); 323 if (numRecords * kUVSMappingRecordSize + kHeaderSize > nonDefaultUVSTableRemaining) { 324 return false; 325 } 326 for (uint32_t i = 0; i < numRecords; ++i) { 327 const size_t recordOffset = kHeaderSize + kUVSMappingRecordSize * i; 328 const uint32_t codePoint = readU24(nonDefaultUVSTable, recordOffset); 329 if (!addRange(rangesFromNonDefaultUVSTable, codePoint, codePoint + 1)) { 330 return false; 331 } 332 } 333 } 334 335 // Then, construct range from default UVS Table with merging code points from non default UVS 336 // table. 337 std::vector<uint32_t> rangesFromDefaultUVSTable; 338 if (defaultUVSTableOffset != 0) { 339 constexpr size_t kHeaderSize = 4; 340 constexpr size_t kUnicodeRangeRecordSize = 4; 341 342 const uint8_t* defaultUVSTable = data + defaultUVSTableOffset; 343 // This subtraction doesn't underflow since the caller already checked 344 // size > defaultUVSTableOffset. 345 const size_t defaultUVSTableRemaining = size - defaultUVSTableOffset; 346 347 if (defaultUVSTableRemaining < kHeaderSize) { 348 return false; 349 } 350 const uint32_t numRecords = readU32(defaultUVSTable, 0); 351 if (numRecords * kUnicodeRangeRecordSize + kHeaderSize > defaultUVSTableRemaining) { 352 return false; 353 } 354 355 for (uint32_t i = 0; i < numRecords; ++i) { 356 const size_t recordOffset = kHeaderSize + kUnicodeRangeRecordSize * i; 357 const uint32_t startCp = readU24(defaultUVSTable, recordOffset); 358 const uint8_t rangeLength = defaultUVSTable[recordOffset + 3]; 359 360 // Then insert range from default UVS Table, but exclude if the base codepoint is not 361 // supported. 362 for (uint32_t cp = startCp; cp <= startCp + rangeLength; ++cp) { 363 // All codepoints in default UVS table should go to the glyphs of the codepoints 364 // without variation selectors. We need to check the default glyph availability and 365 // exclude the codepoint if it is not supported by defualt cmap table. 366 if (baseCoverage.get(cp)) { 367 if (!addRange(rangesFromDefaultUVSTable, cp, cp + 1 /* exclusive */)) { 368 return false; 369 } 370 } 371 } 372 } 373 } 374 *out_ranges = mergeRanges(rangesFromDefaultUVSTable, rangesFromNonDefaultUVSTable); 375 return true; 376 } 377 378 static void getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>>* out, 379 const uint8_t* data, size_t size, const SparseBitSet& baseCoverage) { 380 constexpr size_t kHeaderSize = 10; 381 constexpr size_t kRecordSize = 11; 382 constexpr size_t kLengthOffset = 2; 383 constexpr size_t kNumRecordOffset = 6; 384 385 out->clear(); 386 if (size < kHeaderSize) { 387 return; 388 } 389 390 const uint32_t length = readU32(data, kLengthOffset); 391 if (size < length) { 392 return; 393 } 394 395 uint32_t numRecords = readU32(data, kNumRecordOffset); 396 if (numRecords == 0 || kHeaderSize + kRecordSize * numRecords > length) { 397 return; 398 } 399 400 for (uint32_t i = 0; i < numRecords; ++i) { 401 // Insert from the largest code points since it determines the size of the output vector. 402 const uint32_t recordHeadOffset = kHeaderSize + kRecordSize * (numRecords - i - 1); 403 const uint32_t vsCodePoint = readU24(data, recordHeadOffset); 404 const uint32_t defaultUVSOffset = readU32(data, recordHeadOffset + 3); 405 const uint32_t nonDefaultUVSOffset = readU32(data, recordHeadOffset + 7); 406 if (defaultUVSOffset > length || nonDefaultUVSOffset > length) { 407 continue; 408 } 409 410 const uint16_t vsIndex = getVsIndex(vsCodePoint); 411 if (vsIndex == INVALID_VS_INDEX) { 412 continue; 413 } 414 std::vector<uint32_t> ranges; 415 if (!getVSCoverage(&ranges, data, length, defaultUVSOffset, nonDefaultUVSOffset, 416 baseCoverage)) { 417 continue; 418 } 419 if (out->size() < vsIndex + 1) { 420 out->resize(vsIndex + 1); 421 } 422 (*out)[vsIndex].reset(new SparseBitSet(ranges.data(), ranges.size() >> 1)); 423 } 424 425 out->shrink_to_fit(); 426 } 427 428 SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size, 429 std::vector<std::unique_ptr<SparseBitSet>>* out) { 430 constexpr size_t kHeaderSize = 4; 431 constexpr size_t kNumTablesOffset = 2; 432 constexpr size_t kTableSize = 8; 433 constexpr size_t kPlatformIdOffset = 0; 434 constexpr size_t kEncodingIdOffset = 2; 435 constexpr size_t kOffsetOffset = 4; 436 constexpr size_t kFormatOffset = 0; 437 constexpr uint32_t kNoTable = UINT32_MAX; 438 439 if (kHeaderSize > cmap_size) { 440 return SparseBitSet(); 441 } 442 uint32_t numTables = readU16(cmap_data, kNumTablesOffset); 443 if (kHeaderSize + numTables * kTableSize > cmap_size) { 444 return SparseBitSet(); 445 } 446 447 uint32_t bestTableOffset = kNoTable; 448 uint16_t bestTableFormat = 0; 449 uint8_t bestTablePriority = kLowestPriority; 450 uint32_t vsTableOffset = kNoTable; 451 for (uint32_t i = 0; i < numTables; ++i) { 452 const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize; 453 const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset); 454 const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset); 455 const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset); 456 457 if (offset > cmap_size - 2) { 458 continue; // Invalid table: not enough space to read. 459 } 460 const uint16_t format = readU16(cmap_data, offset + kFormatOffset); 461 462 if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) { 463 if (vsTableOffset == kNoTable && format == 14) { 464 vsTableOffset = offset; 465 } else { 466 // Ignore the (0, 5) table if we have already seen another valid one or it's in a 467 // format we don't understand. 468 } 469 } else { 470 uint32_t length; 471 uint32_t language; 472 473 if (format == 4) { 474 constexpr size_t lengthOffset = 2; 475 constexpr size_t languageOffset = 4; 476 constexpr size_t minTableSize = languageOffset + 2; 477 if (offset > cmap_size - minTableSize) { 478 continue; // Invalid table: not enough space to read. 479 } 480 length = readU16(cmap_data, offset + lengthOffset); 481 language = readU16(cmap_data, offset + languageOffset); 482 } else if (format == 12) { 483 constexpr size_t lengthOffset = 4; 484 constexpr size_t languageOffset = 8; 485 constexpr size_t minTableSize = languageOffset + 4; 486 if (offset > cmap_size - minTableSize) { 487 continue; // Invalid table: not enough space to read. 488 } 489 length = readU32(cmap_data, offset + lengthOffset); 490 language = readU32(cmap_data, offset + languageOffset); 491 } else { 492 continue; 493 } 494 495 if (length > cmap_size - offset) { 496 continue; // Invalid table: table length is larger than whole cmap data size. 497 } 498 if (language != 0) { 499 // Unsupported or invalid table: this is either a subtable for the Macintosh 500 // platform (which we don't support), or an invalid subtable since language field 501 // should be zero for non-Macintosh subtables. 502 continue; 503 } 504 const uint8_t priority = getTablePriority(platformId, encodingId); 505 if (priority < bestTablePriority) { 506 bestTableOffset = offset; 507 bestTablePriority = priority; 508 bestTableFormat = format; 509 } 510 } 511 if (vsTableOffset != kNoTable && bestTablePriority == 0 /* highest priority */) { 512 // Already found the highest priority table and variation sequences table. No need to 513 // look at remaining tables. 514 break; 515 } 516 } 517 518 SparseBitSet coverage; 519 520 if (bestTableOffset != kNoTable) { 521 const uint8_t* tableData = cmap_data + bestTableOffset; 522 const size_t tableSize = cmap_size - bestTableOffset; 523 bool success; 524 vector<uint32_t> coverageVec; 525 if (bestTableFormat == 4) { 526 success = getCoverageFormat4(coverageVec, tableData, tableSize); 527 } else { 528 success = getCoverageFormat12(coverageVec, tableData, tableSize); 529 } 530 531 if (success) { 532 coverage = SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1); 533 } 534 } 535 536 if (vsTableOffset != kNoTable) { 537 const uint8_t* tableData = cmap_data + vsTableOffset; 538 const size_t tableSize = cmap_size - vsTableOffset; 539 getCoverageFormat14(out, tableData, tableSize, coverage); 540 } 541 return coverage; 542 } 543 544 } // namespace minikin 545