1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "cmap.h" 6 7 #include <algorithm> 8 #include <set> 9 #include <utility> 10 #include <vector> 11 12 #include "maxp.h" 13 #include "os2.h" 14 15 // cmap - Character To Glyph Index Mapping Table 16 // http://www.microsoft.com/opentype/otspec/cmap.htm 17 18 namespace { 19 20 struct CMAPSubtableHeader { 21 uint16_t platform; 22 uint16_t encoding; 23 uint32_t offset; 24 uint16_t format; 25 uint32_t length; 26 }; 27 28 struct Subtable314Range { 29 uint16_t start_range; 30 uint16_t end_range; 31 int16_t id_delta; 32 uint16_t id_range_offset; 33 uint32_t id_range_offset_offset; 34 }; 35 36 // The maximum number of groups in format 12, 13 or 14 subtables. 37 // Note: 0xFFFF is the maximum number of glyphs in a single font file. 38 const unsigned kMaxCMAPGroups = 0xFFFF; 39 40 // Glyph array size for the Mac Roman (format 0) table. 41 const size_t kFormat0ArraySize = 256; 42 43 // The upper limit of the Unicode code point. 44 const uint32_t kUnicodeUpperLimit = 0x10FFFF; 45 46 // The maximum number of UVS records (See below). 47 const uint32_t kMaxCMAPSelectorRecords = 259; 48 // The range of UVSes are: 49 // 0x180B-0x180D (3 code points) 50 // 0xFE00-0xFE0F (16 code points) 51 // 0xE0100-0xE01EF (240 code points) 52 const uint32_t kMongolianVSStart = 0x180B; 53 const uint32_t kMongolianVSEnd = 0x180D; 54 const uint32_t kVSStart = 0xFE00; 55 const uint32_t kVSEnd = 0xFE0F; 56 const uint32_t kIVSStart = 0xE0100; 57 const uint32_t kIVSEnd = 0xE01EF; 58 const uint32_t kUVSUpperLimit = 0xFFFFFF; 59 60 // Parses Format 4 tables 61 bool ParseFormat4(ots::OpenTypeFile *file, int platform, int encoding, 62 const uint8_t *data, size_t length, uint16_t num_glyphs) { 63 ots::Buffer subtable(data, length); 64 65 // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the 66 // whole thing and recompacting it, we validate it and include it verbatim 67 // in the output. 68 69 if (!file->os2) { 70 return OTS_FAILURE(); 71 } 72 73 if (!subtable.Skip(4)) { 74 return OTS_FAILURE(); 75 } 76 uint16_t language = 0; 77 if (!subtable.ReadU16(&language)) { 78 return OTS_FAILURE(); 79 } 80 if (language) { 81 // Platform ID 3 (windows) subtables should have language '0'. 82 return OTS_FAILURE(); 83 } 84 85 uint16_t segcountx2, search_range, entry_selector, range_shift; 86 segcountx2 = search_range = entry_selector = range_shift = 0; 87 if (!subtable.ReadU16(&segcountx2) || 88 !subtable.ReadU16(&search_range) || 89 !subtable.ReadU16(&entry_selector) || 90 !subtable.ReadU16(&range_shift)) { 91 return OTS_FAILURE(); 92 } 93 94 if (segcountx2 & 1 || search_range & 1) { 95 return OTS_FAILURE(); 96 } 97 const uint16_t segcount = segcountx2 >> 1; 98 // There must be at least one segment according the spec. 99 if (segcount < 1) { 100 return OTS_FAILURE(); 101 } 102 103 // log2segcount is the maximal x s.t. 2^x < segcount 104 unsigned log2segcount = 0; 105 while (1u << (log2segcount + 1) <= segcount) { 106 log2segcount++; 107 } 108 109 const uint16_t expected_search_range = 2 * 1u << log2segcount; 110 if (expected_search_range != search_range) { 111 return OTS_FAILURE(); 112 } 113 114 if (entry_selector != log2segcount) { 115 return OTS_FAILURE(); 116 } 117 118 const uint16_t expected_range_shift = segcountx2 - search_range; 119 if (range_shift != expected_range_shift) { 120 return OTS_FAILURE(); 121 } 122 123 std::vector<Subtable314Range> ranges(segcount); 124 125 for (unsigned i = 0; i < segcount; ++i) { 126 if (!subtable.ReadU16(&ranges[i].end_range)) { 127 return OTS_FAILURE(); 128 } 129 } 130 131 uint16_t padding; 132 if (!subtable.ReadU16(&padding)) { 133 return OTS_FAILURE(); 134 } 135 if (padding) { 136 return OTS_FAILURE(); 137 } 138 139 for (unsigned i = 0; i < segcount; ++i) { 140 if (!subtable.ReadU16(&ranges[i].start_range)) { 141 return OTS_FAILURE(); 142 } 143 } 144 for (unsigned i = 0; i < segcount; ++i) { 145 if (!subtable.ReadS16(&ranges[i].id_delta)) { 146 return OTS_FAILURE(); 147 } 148 } 149 for (unsigned i = 0; i < segcount; ++i) { 150 ranges[i].id_range_offset_offset = subtable.offset(); 151 if (!subtable.ReadU16(&ranges[i].id_range_offset)) { 152 return OTS_FAILURE(); 153 } 154 155 if (ranges[i].id_range_offset & 1) { 156 // Some font generators seem to put 65535 on id_range_offset 157 // for 0xFFFF-0xFFFF range. 158 // (e.g., many fonts in http://www.princexml.com/fonts/) 159 if (i == segcount - 1u) { 160 OTS_WARNING("bad id_range_offset"); 161 ranges[i].id_range_offset = 0; 162 // The id_range_offset value in the transcoded font will not change 163 // since this table is not actually "transcoded" yet. 164 } else { 165 return OTS_FAILURE(); 166 } 167 } 168 } 169 170 // ranges must be ascending order, based on the end_code. Ranges may not 171 // overlap. 172 for (unsigned i = 1; i < segcount; ++i) { 173 if ((i == segcount - 1u) && 174 (ranges[i - 1].start_range == 0xffff) && 175 (ranges[i - 1].end_range == 0xffff) && 176 (ranges[i].start_range == 0xffff) && 177 (ranges[i].end_range == 0xffff)) { 178 // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators. 179 // We'll accept them as an exception. 180 OTS_WARNING("multiple 0xffff terminators found"); 181 continue; 182 } 183 184 // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have 185 // unsorted table... 186 if (ranges[i].end_range <= ranges[i - 1].end_range) { 187 return OTS_FAILURE(); 188 } 189 if (ranges[i].start_range <= ranges[i - 1].end_range) { 190 return OTS_FAILURE(); 191 } 192 193 // On many fonts, the value of {first, last}_char_index are incorrect. 194 // Fix them. 195 if (file->os2->first_char_index != 0xFFFF && 196 ranges[i].start_range != 0xFFFF && 197 file->os2->first_char_index > ranges[i].start_range) { 198 file->os2->first_char_index = ranges[i].start_range; 199 } 200 if (file->os2->last_char_index != 0xFFFF && 201 ranges[i].end_range != 0xFFFF && 202 file->os2->last_char_index < ranges[i].end_range) { 203 file->os2->last_char_index = ranges[i].end_range; 204 } 205 } 206 207 // The last range must end at 0xffff 208 if (ranges[segcount - 1].end_range != 0xffff) { 209 return OTS_FAILURE(); 210 } 211 212 // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of 213 // each code-point defined in the table and make sure that they are all valid 214 // glyphs and that we don't access anything out-of-bounds. 215 for (unsigned i = 0; i < segcount; ++i) { 216 for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) { 217 const uint16_t code_point = cp; 218 if (ranges[i].id_range_offset == 0) { 219 // this is explictly allowed to overflow in the spec 220 const uint16_t glyph = code_point + ranges[i].id_delta; 221 if (glyph >= num_glyphs) { 222 return OTS_FAILURE(); 223 } 224 } else { 225 const uint16_t range_delta = code_point - ranges[i].start_range; 226 // this might seem odd, but it's true. The offset is relative to the 227 // location of the offset value itself. 228 const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset + 229 ranges[i].id_range_offset + 230 range_delta * 2; 231 // We need to be able to access a 16-bit value from this offset 232 if (glyph_id_offset + 1 >= length) { 233 return OTS_FAILURE(); 234 } 235 uint16_t glyph; 236 memcpy(&glyph, data + glyph_id_offset, 2); 237 glyph = ntohs(glyph); 238 if (glyph >= num_glyphs) { 239 return OTS_FAILURE(); 240 } 241 } 242 } 243 } 244 245 // We accept the table. 246 // TODO(yusukes): transcode the subtable. 247 if (platform == 3 && encoding == 0) { 248 file->cmap->subtable_3_0_4_data = data; 249 file->cmap->subtable_3_0_4_length = length; 250 } else if (platform == 3 && encoding == 1) { 251 file->cmap->subtable_3_1_4_data = data; 252 file->cmap->subtable_3_1_4_length = length; 253 } else if (platform == 0 && encoding == 3) { 254 file->cmap->subtable_0_3_4_data = data; 255 file->cmap->subtable_0_3_4_length = length; 256 } else { 257 return OTS_FAILURE(); 258 } 259 260 return true; 261 } 262 263 bool Parse31012(ots::OpenTypeFile *file, 264 const uint8_t *data, size_t length, uint16_t num_glyphs) { 265 ots::Buffer subtable(data, length); 266 267 // Format 12 tables are simple. We parse these and fully serialise them 268 // later. 269 270 if (!subtable.Skip(8)) { 271 return OTS_FAILURE(); 272 } 273 uint32_t language = 0; 274 if (!subtable.ReadU32(&language)) { 275 return OTS_FAILURE(); 276 } 277 if (language) { 278 return OTS_FAILURE(); 279 } 280 281 uint32_t num_groups = 0; 282 if (!subtable.ReadU32(&num_groups)) { 283 return OTS_FAILURE(); 284 } 285 if (num_groups == 0 || num_groups > kMaxCMAPGroups) { 286 return OTS_FAILURE(); 287 } 288 289 std::vector<ots::OpenTypeCMAPSubtableRange> &groups 290 = file->cmap->subtable_3_10_12; 291 groups.resize(num_groups); 292 293 for (unsigned i = 0; i < num_groups; ++i) { 294 if (!subtable.ReadU32(&groups[i].start_range) || 295 !subtable.ReadU32(&groups[i].end_range) || 296 !subtable.ReadU32(&groups[i].start_glyph_id)) { 297 return OTS_FAILURE(); 298 } 299 300 if (groups[i].start_range > kUnicodeUpperLimit || 301 groups[i].end_range > kUnicodeUpperLimit || 302 groups[i].start_glyph_id > 0xFFFF) { 303 return OTS_FAILURE(); 304 } 305 306 // [0xD800, 0xDFFF] are surrogate code points. 307 if (groups[i].start_range >= 0xD800 && 308 groups[i].start_range <= 0xDFFF) { 309 return OTS_FAILURE(); 310 } 311 if (groups[i].end_range >= 0xD800 && 312 groups[i].end_range <= 0xDFFF) { 313 return OTS_FAILURE(); 314 } 315 if (groups[i].start_range < 0xD800 && 316 groups[i].end_range > 0xDFFF) { 317 return OTS_FAILURE(); 318 } 319 320 // We assert that the glyph value is within range. Because of the range 321 // limits, above, we don't need to worry about overflow. 322 if (groups[i].end_range < groups[i].start_range) { 323 return OTS_FAILURE(); 324 } 325 if ((groups[i].end_range - groups[i].start_range) + 326 groups[i].start_glyph_id > num_glyphs) { 327 return OTS_FAILURE(); 328 } 329 } 330 331 // the groups must be sorted by start code and may not overlap 332 for (unsigned i = 1; i < num_groups; ++i) { 333 if (groups[i].start_range <= groups[i - 1].start_range) { 334 return OTS_FAILURE(); 335 } 336 if (groups[i].start_range <= groups[i - 1].end_range) { 337 return OTS_FAILURE(); 338 } 339 } 340 341 return true; 342 } 343 344 bool Parse31013(ots::OpenTypeFile *file, 345 const uint8_t *data, size_t length, uint16_t num_glyphs) { 346 ots::Buffer subtable(data, length); 347 348 // Format 13 tables are simple. We parse these and fully serialise them 349 // later. 350 351 if (!subtable.Skip(8)) { 352 return OTS_FAILURE(); 353 } 354 uint16_t language = 0; 355 if (!subtable.ReadU16(&language)) { 356 return OTS_FAILURE(); 357 } 358 if (language) { 359 return OTS_FAILURE(); 360 } 361 362 uint32_t num_groups = 0; 363 if (!subtable.ReadU32(&num_groups)) { 364 return OTS_FAILURE(); 365 } 366 367 // We limit the number of groups in the same way as in 3.10.12 tables. See 368 // the comment there in 369 if (num_groups == 0 || num_groups > kMaxCMAPGroups) { 370 return OTS_FAILURE(); 371 } 372 373 std::vector<ots::OpenTypeCMAPSubtableRange> &groups 374 = file->cmap->subtable_3_10_13; 375 groups.resize(num_groups); 376 377 for (unsigned i = 0; i < num_groups; ++i) { 378 if (!subtable.ReadU32(&groups[i].start_range) || 379 !subtable.ReadU32(&groups[i].end_range) || 380 !subtable.ReadU32(&groups[i].start_glyph_id)) { 381 return OTS_FAILURE(); 382 } 383 384 // We conservatively limit all of the values to protect some parsers from 385 // overflows 386 if (groups[i].start_range > kUnicodeUpperLimit || 387 groups[i].end_range > kUnicodeUpperLimit || 388 groups[i].start_glyph_id > 0xFFFF) { 389 return OTS_FAILURE(); 390 } 391 392 if (groups[i].start_glyph_id >= num_glyphs) { 393 return OTS_FAILURE(); 394 } 395 } 396 397 // the groups must be sorted by start code and may not overlap 398 for (unsigned i = 1; i < num_groups; ++i) { 399 if (groups[i].start_range <= groups[i - 1].start_range) { 400 return OTS_FAILURE(); 401 } 402 if (groups[i].start_range <= groups[i - 1].end_range) { 403 return OTS_FAILURE(); 404 } 405 } 406 407 return true; 408 } 409 410 bool Parse0514(ots::OpenTypeFile *file, 411 const uint8_t *data, size_t length, uint16_t num_glyphs) { 412 // Unicode Variation Selector table 413 ots::Buffer subtable(data, length); 414 415 // Format 14 tables are simple. We parse these and fully serialise them 416 // later. 417 418 // Skip format (USHORT) and length (ULONG) 419 if (!subtable.Skip(6)) { 420 return OTS_FAILURE(); 421 } 422 423 uint32_t num_records = 0; 424 if (!subtable.ReadU32(&num_records)) { 425 return OTS_FAILURE(); 426 } 427 if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) { 428 return OTS_FAILURE(); 429 } 430 431 std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records 432 = file->cmap->subtable_0_5_14; 433 records.resize(num_records); 434 435 for (unsigned i = 0; i < num_records; ++i) { 436 if (!subtable.ReadU24(&records[i].var_selector) || 437 !subtable.ReadU32(&records[i].default_offset) || 438 !subtable.ReadU32(&records[i].non_default_offset)) { 439 return OTS_FAILURE(); 440 } 441 // Checks the value of variation selector 442 if (!((records[i].var_selector >= kMongolianVSStart && 443 records[i].var_selector <= kMongolianVSEnd) || 444 (records[i].var_selector >= kVSStart && 445 records[i].var_selector <= kVSEnd) || 446 (records[i].var_selector >= kIVSStart && 447 records[i].var_selector <= kIVSEnd))) { 448 return OTS_FAILURE(); 449 } 450 if (i > 0 && 451 records[i-1].var_selector >= records[i].var_selector) { 452 return OTS_FAILURE(); 453 } 454 455 // Checks offsets 456 if (!records[i].default_offset && !records[i].non_default_offset) { 457 return OTS_FAILURE(); 458 } 459 if (records[i].default_offset && 460 records[i].default_offset >= length) { 461 return OTS_FAILURE(); 462 } 463 if (records[i].non_default_offset && 464 records[i].non_default_offset >= length) { 465 return OTS_FAILURE(); 466 } 467 } 468 469 for (unsigned i = 0; i < num_records; ++i) { 470 // Checks default UVS table 471 if (records[i].default_offset) { 472 subtable.set_offset(records[i].default_offset); 473 uint32_t num_ranges = 0; 474 if (!subtable.ReadU32(&num_ranges)) { 475 return OTS_FAILURE(); 476 } 477 if (!num_ranges || num_ranges > kMaxCMAPGroups) { 478 return OTS_FAILURE(); 479 } 480 481 uint32_t last_unicode_value = 0; 482 std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges 483 = records[i].ranges; 484 ranges.resize(num_ranges); 485 486 for (unsigned j = 0; j < num_ranges; ++j) { 487 if (!subtable.ReadU24(&ranges[j].unicode_value) || 488 !subtable.ReadU8(&ranges[j].additional_count)) { 489 return OTS_FAILURE(); 490 } 491 const uint32_t check_value = 492 ranges[j].unicode_value + ranges[j].additional_count; 493 if (ranges[j].unicode_value == 0 || 494 ranges[j].unicode_value > kUnicodeUpperLimit || 495 check_value > kUVSUpperLimit || 496 (last_unicode_value && 497 ranges[j].unicode_value <= last_unicode_value)) { 498 return OTS_FAILURE(); 499 } 500 last_unicode_value = check_value; 501 } 502 } 503 504 // Checks non default UVS table 505 if (records[i].non_default_offset) { 506 subtable.set_offset(records[i].non_default_offset); 507 uint32_t num_mappings = 0; 508 if (!subtable.ReadU32(&num_mappings)) { 509 return OTS_FAILURE(); 510 } 511 if (!num_mappings || num_mappings > kMaxCMAPGroups) { 512 return OTS_FAILURE(); 513 } 514 515 uint32_t last_unicode_value = 0; 516 std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings 517 = records[i].mappings; 518 mappings.resize(num_mappings); 519 520 for (unsigned j = 0; j < num_mappings; ++j) { 521 if (!subtable.ReadU24(&mappings[j].unicode_value) || 522 !subtable.ReadU16(&mappings[j].glyph_id)) { 523 return OTS_FAILURE(); 524 } 525 if (mappings[j].glyph_id == 0 || 526 mappings[j].unicode_value == 0 || 527 mappings[j].unicode_value > kUnicodeUpperLimit || 528 (last_unicode_value && 529 mappings[j].unicode_value <= last_unicode_value)) { 530 return OTS_FAILURE(); 531 } 532 last_unicode_value = mappings[j].unicode_value; 533 } 534 } 535 } 536 537 if (subtable.offset() != length) { 538 return OTS_FAILURE(); 539 } 540 file->cmap->subtable_0_5_14_length = subtable.offset(); 541 return true; 542 } 543 544 bool Parse100(ots::OpenTypeFile *file, const uint8_t *data, size_t length) { 545 // Mac Roman table 546 ots::Buffer subtable(data, length); 547 548 if (!subtable.Skip(4)) { 549 return OTS_FAILURE(); 550 } 551 uint16_t language = 0; 552 if (!subtable.ReadU16(&language)) { 553 return OTS_FAILURE(); 554 } 555 if (language) { 556 // simsun.ttf has non-zero language id. 557 OTS_WARNING("language id should be zero: %u", language); 558 } 559 560 file->cmap->subtable_1_0_0.reserve(kFormat0ArraySize); 561 for (size_t i = 0; i < kFormat0ArraySize; ++i) { 562 uint8_t glyph_id = 0; 563 if (!subtable.ReadU8(&glyph_id)) { 564 return OTS_FAILURE(); 565 } 566 file->cmap->subtable_1_0_0.push_back(glyph_id); 567 } 568 569 return true; 570 } 571 572 } // namespace 573 574 namespace ots { 575 576 bool ots_cmap_parse(OpenTypeFile *file, const uint8_t *data, size_t length) { 577 Buffer table(data, length); 578 file->cmap = new OpenTypeCMAP; 579 580 uint16_t version = 0; 581 uint16_t num_tables = 0; 582 if (!table.ReadU16(&version) || 583 !table.ReadU16(&num_tables)) { 584 return OTS_FAILURE(); 585 } 586 587 if (version != 0) { 588 return OTS_FAILURE(); 589 } 590 if (!num_tables) { 591 return OTS_FAILURE(); 592 } 593 594 std::vector<CMAPSubtableHeader> subtable_headers; 595 596 // read the subtable headers 597 subtable_headers.reserve(num_tables); 598 for (unsigned i = 0; i < num_tables; ++i) { 599 CMAPSubtableHeader subt; 600 601 if (!table.ReadU16(&subt.platform) || 602 !table.ReadU16(&subt.encoding) || 603 !table.ReadU32(&subt.offset)) { 604 return OTS_FAILURE(); 605 } 606 607 subtable_headers.push_back(subt); 608 } 609 610 const size_t data_offset = table.offset(); 611 612 // make sure that all the offsets are valid. 613 uint32_t last_id = 0; 614 for (unsigned i = 0; i < num_tables; ++i) { 615 if (subtable_headers[i].offset > 1024 * 1024 * 1024) { 616 return OTS_FAILURE(); 617 } 618 if (subtable_headers[i].offset < data_offset || 619 subtable_headers[i].offset >= length) { 620 return OTS_FAILURE(); 621 } 622 623 // check if the table is sorted first by platform ID, then by encoding ID. 624 uint32_t current_id 625 = (subtable_headers[i].platform << 16) + subtable_headers[i].encoding; 626 if ((i != 0) && (last_id >= current_id)) { 627 return OTS_FAILURE(); 628 } 629 last_id = current_id; 630 } 631 632 // the format of the table is the first couple of bytes in the table. The 633 // length of the table is stored in a format-specific way. 634 for (unsigned i = 0; i < num_tables; ++i) { 635 table.set_offset(subtable_headers[i].offset); 636 if (!table.ReadU16(&subtable_headers[i].format)) { 637 return OTS_FAILURE(); 638 } 639 640 uint16_t len = 0; 641 switch (subtable_headers[i].format) { 642 case 0: 643 case 4: 644 if (!table.ReadU16(&len)) { 645 return OTS_FAILURE(); 646 } 647 subtable_headers[i].length = len; 648 break; 649 case 12: 650 case 13: 651 if (!table.Skip(2)) { 652 return OTS_FAILURE(); 653 } 654 if (!table.ReadU32(&subtable_headers[i].length)) { 655 return OTS_FAILURE(); 656 } 657 break; 658 case 14: 659 if (!table.ReadU32(&subtable_headers[i].length)) { 660 return OTS_FAILURE(); 661 } 662 break; 663 default: 664 subtable_headers[i].length = 0; 665 break; 666 } 667 } 668 669 // Now, verify that all the lengths are sane 670 for (unsigned i = 0; i < num_tables; ++i) { 671 if (!subtable_headers[i].length) continue; 672 if (subtable_headers[i].length > 1024 * 1024 * 1024) { 673 return OTS_FAILURE(); 674 } 675 // We know that both the offset and length are < 1GB, so the following 676 // addition doesn't overflow 677 const uint32_t end_byte 678 = subtable_headers[i].offset + subtable_headers[i].length; 679 if (end_byte > length) { 680 return OTS_FAILURE(); 681 } 682 } 683 684 // check that the cmap subtables are not overlapping. 685 std::set<std::pair<uint32_t, uint32_t> > uniq_checker; 686 std::vector<std::pair<uint32_t, uint8_t> > overlap_checker; 687 for (unsigned i = 0; i < num_tables; ++i) { 688 const uint32_t end_byte 689 = subtable_headers[i].offset + subtable_headers[i].length; 690 691 if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset, 692 end_byte)).second) { 693 // Sometimes Unicode table and MS table share exactly the same data. 694 // We'll allow this. 695 continue; 696 } 697 overlap_checker.push_back( 698 std::make_pair(subtable_headers[i].offset, 699 static_cast<uint8_t>(1) /* start */)); 700 overlap_checker.push_back( 701 std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */)); 702 } 703 std::sort(overlap_checker.begin(), overlap_checker.end()); 704 int overlap_count = 0; 705 for (unsigned i = 0; i < overlap_checker.size(); ++i) { 706 overlap_count += (overlap_checker[i].second ? 1 : -1); 707 if (overlap_count > 1) { 708 return OTS_FAILURE(); 709 } 710 } 711 712 // we grab the number of glyphs in the file from the maxp table to make sure 713 // that the character map isn't referencing anything beyound this range. 714 if (!file->maxp) { 715 return OTS_FAILURE(); 716 } 717 const uint16_t num_glyphs = file->maxp->num_glyphs; 718 719 // We only support a subset of the possible character map tables. Microsoft 720 // 'strongly recommends' that everyone supports the Unicode BMP table with 721 // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables: 722 // Platform ID Encoding ID Format 723 // 0 0 4 (Unicode Default) 724 // 0 3 4 (Unicode BMP) 725 // 0 3 12 (Unicode UCS-4) 726 // 0 5 14 (Unicode Variation Sequences) 727 // 1 0 0 (Mac Roman) 728 // 3 0 4 (MS Symbol) 729 // 3 1 4 (MS Unicode BMP) 730 // 3 10 12 (MS Unicode UCS-4) 731 // 3 10 13 (MS UCS-4 Fallback mapping) 732 // 733 // Note: 734 // * 0-0-4 table is (usually) written as a 3-1-4 table. If 3-1-4 table 735 // also exists, the 0-0-4 table is ignored. 736 // * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table. 737 // Some fonts which include 0-5-14 table seems to be required 0-3-4 738 // table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists. 739 // * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also 740 // exists, the 0-3-12 table is ignored. 741 // 742 743 for (unsigned i = 0; i < num_tables; ++i) { 744 if (subtable_headers[i].platform == 0) { 745 // Unicode platform 746 747 if ((subtable_headers[i].encoding == 0) && 748 (subtable_headers[i].format == 4)) { 749 // parse and output the 0-0-4 table as 3-1-4 table. Sometimes the 0-0-4 750 // table actually points to MS symbol data and thus should be parsed as 751 // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be 752 // recovered in ots_cmap_serialise(). 753 if (!ParseFormat4(file, 3, 1, data + subtable_headers[i].offset, 754 subtable_headers[i].length, num_glyphs)) { 755 return OTS_FAILURE(); 756 } 757 } else if ((subtable_headers[i].encoding == 3) && 758 (subtable_headers[i].format == 4)) { 759 // parse and output the 0-3-4 table as 0-3-4 table. 760 if (!ParseFormat4(file, 0, 3, data + subtable_headers[i].offset, 761 subtable_headers[i].length, num_glyphs)) { 762 return OTS_FAILURE(); 763 } 764 } else if ((subtable_headers[i].encoding == 3) && 765 (subtable_headers[i].format == 12)) { 766 // parse and output the 0-3-12 table as 3-10-12 table. 767 if (!Parse31012(file, data + subtable_headers[i].offset, 768 subtable_headers[i].length, num_glyphs)) { 769 return OTS_FAILURE(); 770 } 771 } else if ((subtable_headers[i].encoding == 5) && 772 (subtable_headers[i].format == 14)) { 773 if (!Parse0514(file, data + subtable_headers[i].offset, 774 subtable_headers[i].length, num_glyphs)) { 775 return OTS_FAILURE(); 776 } 777 } 778 } else if (subtable_headers[i].platform == 1) { 779 // Mac platform 780 781 if ((subtable_headers[i].encoding == 0) && 782 (subtable_headers[i].format == 0)) { 783 // parse and output the 1-0-0 table. 784 if (!Parse100(file, data + subtable_headers[i].offset, 785 subtable_headers[i].length)) { 786 return OTS_FAILURE(); 787 } 788 } 789 } else if (subtable_headers[i].platform == 3) { 790 // MS platform 791 792 switch (subtable_headers[i].encoding) { 793 case 0: 794 case 1: 795 if (subtable_headers[i].format == 4) { 796 // parse 3-0-4 or 3-1-4 table. 797 if (!ParseFormat4(file, subtable_headers[i].platform, 798 subtable_headers[i].encoding, 799 data + subtable_headers[i].offset, 800 subtable_headers[i].length, num_glyphs)) { 801 return OTS_FAILURE(); 802 } 803 } 804 break; 805 case 10: 806 if (subtable_headers[i].format == 12) { 807 file->cmap->subtable_3_10_12.clear(); 808 if (!Parse31012(file, data + subtable_headers[i].offset, 809 subtable_headers[i].length, num_glyphs)) { 810 return OTS_FAILURE(); 811 } 812 } else if (subtable_headers[i].format == 13) { 813 file->cmap->subtable_3_10_13.clear(); 814 if (!Parse31013(file, data + subtable_headers[i].offset, 815 subtable_headers[i].length, num_glyphs)) { 816 return OTS_FAILURE(); 817 } 818 } 819 break; 820 } 821 } 822 } 823 824 return true; 825 } 826 827 bool ots_cmap_should_serialise(OpenTypeFile *file) { 828 return file->cmap != NULL; 829 } 830 831 bool ots_cmap_serialise(OTSStream *out, OpenTypeFile *file) { 832 const bool have_034 = file->cmap->subtable_0_3_4_data != NULL; 833 const bool have_0514 = file->cmap->subtable_0_5_14.size() != 0; 834 const bool have_100 = file->cmap->subtable_1_0_0.size() != 0; 835 const bool have_304 = file->cmap->subtable_3_0_4_data != NULL; 836 // MS Symbol and MS Unicode tables should not co-exist. 837 // See the comment above in 0-0-4 parser. 838 const bool have_314 = (!have_304) && file->cmap->subtable_3_1_4_data; 839 const bool have_31012 = file->cmap->subtable_3_10_12.size() != 0; 840 const bool have_31013 = file->cmap->subtable_3_10_13.size() != 0; 841 const unsigned num_subtables = static_cast<unsigned>(have_034) + 842 static_cast<unsigned>(have_0514) + 843 static_cast<unsigned>(have_100) + 844 static_cast<unsigned>(have_304) + 845 static_cast<unsigned>(have_314) + 846 static_cast<unsigned>(have_31012) + 847 static_cast<unsigned>(have_31013); 848 const off_t table_start = out->Tell(); 849 850 // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables 851 // (e.g., old fonts for Mac). We don't support them. 852 if (!have_304 && !have_314 && !have_034) { 853 return OTS_FAILURE(); 854 } 855 856 if (!out->WriteU16(0) || 857 !out->WriteU16(num_subtables)) { 858 return OTS_FAILURE(); 859 } 860 861 const off_t record_offset = out->Tell(); 862 if (!out->Pad(num_subtables * 8)) { 863 return OTS_FAILURE(); 864 } 865 866 const off_t offset_034 = out->Tell(); 867 if (have_034) { 868 if (!out->Write(file->cmap->subtable_0_3_4_data, 869 file->cmap->subtable_0_3_4_length)) { 870 return OTS_FAILURE(); 871 } 872 } 873 874 const off_t offset_0514 = out->Tell(); 875 if (have_0514) { 876 const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records 877 = file->cmap->subtable_0_5_14; 878 const unsigned num_records = records.size(); 879 if (!out->WriteU16(14) || 880 !out->WriteU32(file->cmap->subtable_0_5_14_length) || 881 !out->WriteU32(num_records)) { 882 return OTS_FAILURE(); 883 } 884 for (unsigned i = 0; i < num_records; ++i) { 885 if (!out->WriteU24(records[i].var_selector) || 886 !out->WriteU32(records[i].default_offset) || 887 !out->WriteU32(records[i].non_default_offset)) { 888 return OTS_FAILURE(); 889 } 890 } 891 for (unsigned i = 0; i < num_records; ++i) { 892 if (records[i].default_offset) { 893 const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges 894 = records[i].ranges; 895 const unsigned num_ranges = ranges.size(); 896 if (!out->Seek(records[i].default_offset + offset_0514) || 897 !out->WriteU32(num_ranges)) { 898 return OTS_FAILURE(); 899 } 900 for (unsigned j = 0; j < num_ranges; ++j) { 901 if (!out->WriteU24(ranges[j].unicode_value) || 902 !out->WriteU8(ranges[j].additional_count)) { 903 return OTS_FAILURE(); 904 } 905 } 906 } 907 if (records[i].non_default_offset) { 908 const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings 909 = records[i].mappings; 910 const unsigned num_mappings = mappings.size(); 911 if (!out->Seek(records[i].non_default_offset + offset_0514) || 912 !out->WriteU32(num_mappings)) { 913 return OTS_FAILURE(); 914 } 915 for (unsigned j = 0; j < num_mappings; ++j) { 916 if (!out->WriteU24(mappings[j].unicode_value) || 917 !out->WriteU16(mappings[j].glyph_id)) { 918 return OTS_FAILURE(); 919 } 920 } 921 } 922 } 923 } 924 925 const off_t offset_100 = out->Tell(); 926 if (have_100) { 927 if (!out->WriteU16(0) || // format 928 !out->WriteU16(6 + kFormat0ArraySize) || // length 929 !out->WriteU16(0)) { // language 930 return OTS_FAILURE(); 931 } 932 if (!out->Write(&(file->cmap->subtable_1_0_0[0]), kFormat0ArraySize)) { 933 return OTS_FAILURE(); 934 } 935 } 936 937 const off_t offset_304 = out->Tell(); 938 if (have_304) { 939 if (!out->Write(file->cmap->subtable_3_0_4_data, 940 file->cmap->subtable_3_0_4_length)) { 941 return OTS_FAILURE(); 942 } 943 } 944 945 const off_t offset_314 = out->Tell(); 946 if (have_314) { 947 if (!out->Write(file->cmap->subtable_3_1_4_data, 948 file->cmap->subtable_3_1_4_length)) { 949 return OTS_FAILURE(); 950 } 951 } 952 953 const off_t offset_31012 = out->Tell(); 954 if (have_31012) { 955 std::vector<OpenTypeCMAPSubtableRange> &groups 956 = file->cmap->subtable_3_10_12; 957 const unsigned num_groups = groups.size(); 958 if (!out->WriteU16(12) || 959 !out->WriteU16(0) || 960 !out->WriteU32(num_groups * 12 + 16) || 961 !out->WriteU32(0) || 962 !out->WriteU32(num_groups)) { 963 return OTS_FAILURE(); 964 } 965 966 for (unsigned i = 0; i < num_groups; ++i) { 967 if (!out->WriteU32(groups[i].start_range) || 968 !out->WriteU32(groups[i].end_range) || 969 !out->WriteU32(groups[i].start_glyph_id)) { 970 return OTS_FAILURE(); 971 } 972 } 973 } 974 975 const off_t offset_31013 = out->Tell(); 976 if (have_31013) { 977 std::vector<OpenTypeCMAPSubtableRange> &groups 978 = file->cmap->subtable_3_10_13; 979 const unsigned num_groups = groups.size(); 980 if (!out->WriteU16(13) || 981 !out->WriteU16(0) || 982 !out->WriteU32(num_groups * 12 + 14) || 983 !out->WriteU32(0) || 984 !out->WriteU32(num_groups)) { 985 return OTS_FAILURE(); 986 } 987 988 for (unsigned i = 0; i < num_groups; ++i) { 989 if (!out->WriteU32(groups[i].start_range) || 990 !out->WriteU32(groups[i].end_range) || 991 !out->WriteU32(groups[i].start_glyph_id)) { 992 return OTS_FAILURE(); 993 } 994 } 995 } 996 997 const off_t table_end = out->Tell(); 998 // We might have hanging bytes from the above's checksum which the OTSStream 999 // then merges into the table of offsets. 1000 OTSStream::ChecksumState saved_checksum = out->SaveChecksumState(); 1001 out->ResetChecksum(); 1002 1003 // Now seek back and write the table of offsets 1004 if (!out->Seek(record_offset)) { 1005 return OTS_FAILURE(); 1006 } 1007 1008 if (have_034) { 1009 if (!out->WriteU16(0) || 1010 !out->WriteU16(3) || 1011 !out->WriteU32(offset_034 - table_start)) { 1012 return OTS_FAILURE(); 1013 } 1014 } 1015 1016 if (have_0514) { 1017 if (!out->WriteU16(0) || 1018 !out->WriteU16(5) || 1019 !out->WriteU32(offset_0514 - table_start)) { 1020 return OTS_FAILURE(); 1021 } 1022 } 1023 1024 if (have_100) { 1025 if (!out->WriteU16(1) || 1026 !out->WriteU16(0) || 1027 !out->WriteU32(offset_100 - table_start)) { 1028 return OTS_FAILURE(); 1029 } 1030 } 1031 1032 if (have_304) { 1033 if (!out->WriteU16(3) || 1034 !out->WriteU16(0) || 1035 !out->WriteU32(offset_304 - table_start)) { 1036 return OTS_FAILURE(); 1037 } 1038 } 1039 1040 if (have_314) { 1041 if (!out->WriteU16(3) || 1042 !out->WriteU16(1) || 1043 !out->WriteU32(offset_314 - table_start)) { 1044 return OTS_FAILURE(); 1045 } 1046 } 1047 1048 if (have_31012) { 1049 if (!out->WriteU16(3) || 1050 !out->WriteU16(10) || 1051 !out->WriteU32(offset_31012 - table_start)) { 1052 return OTS_FAILURE(); 1053 } 1054 } 1055 1056 if (have_31013) { 1057 if (!out->WriteU16(3) || 1058 !out->WriteU16(10) || 1059 !out->WriteU32(offset_31013 - table_start)) { 1060 return OTS_FAILURE(); 1061 } 1062 } 1063 1064 if (!out->Seek(table_end)) { 1065 return OTS_FAILURE(); 1066 } 1067 out->RestoreChecksum(saved_checksum); 1068 1069 return true; 1070 } 1071 1072 void ots_cmap_free(OpenTypeFile *file) { 1073 delete file->cmap; 1074 } 1075 1076 } // namespace ots 1077