Home | History | Annotate | Download | only in src
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "cmap.h"
      6 
      7 #include <algorithm>
      8 #include <set>
      9 #include <utility>
     10 #include <vector>
     11 
     12 #include "maxp.h"
     13 #include "os2.h"
     14 
     15 // cmap - Character To Glyph Index Mapping Table
     16 // http://www.microsoft.com/opentype/otspec/cmap.htm
     17 
     18 namespace {
     19 
     20 struct CMAPSubtableHeader {
     21   uint16_t platform;
     22   uint16_t encoding;
     23   uint32_t offset;
     24   uint16_t format;
     25   uint32_t length;
     26 };
     27 
     28 struct Subtable314Range {
     29   uint16_t start_range;
     30   uint16_t end_range;
     31   int16_t id_delta;
     32   uint16_t id_range_offset;
     33   uint32_t id_range_offset_offset;
     34 };
     35 
     36 // The maximum number of groups in format 12, 13 or 14 subtables.
     37 // Note: 0xFFFF is the maximum number of glyphs in a single font file.
     38 const unsigned kMaxCMAPGroups = 0xFFFF;
     39 
     40 // Glyph array size for the Mac Roman (format 0) table.
     41 const size_t kFormat0ArraySize = 256;
     42 
     43 // The upper limit of the Unicode code point.
     44 const uint32_t kUnicodeUpperLimit = 0x10FFFF;
     45 
     46 // The maximum number of UVS records (See below).
     47 const uint32_t kMaxCMAPSelectorRecords = 259;
     48 // The range of UVSes are:
     49 //   0x180B-0x180D (3 code points)
     50 //   0xFE00-0xFE0F (16 code points)
     51 //   0xE0100-0xE01EF (240 code points)
     52 const uint32_t kMongolianVSStart = 0x180B;
     53 const uint32_t kMongolianVSEnd = 0x180D;
     54 const uint32_t kVSStart = 0xFE00;
     55 const uint32_t kVSEnd = 0xFE0F;
     56 const uint32_t kIVSStart = 0xE0100;
     57 const uint32_t kIVSEnd = 0xE01EF;
     58 const uint32_t kUVSUpperLimit = 0xFFFFFF;
     59 
     60 // Parses Format 4 tables
     61 bool ParseFormat4(ots::OpenTypeFile *file, int platform, int encoding,
     62               const uint8_t *data, size_t length, uint16_t num_glyphs) {
     63   ots::Buffer subtable(data, length);
     64 
     65   // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the
     66   // whole thing and recompacting it, we validate it and include it verbatim
     67   // in the output.
     68 
     69   if (!file->os2) {
     70     return OTS_FAILURE();
     71   }
     72 
     73   if (!subtable.Skip(4)) {
     74     return OTS_FAILURE();
     75   }
     76   uint16_t language = 0;
     77   if (!subtable.ReadU16(&language)) {
     78     return OTS_FAILURE();
     79   }
     80   if (language) {
     81     // Platform ID 3 (windows) subtables should have language '0'.
     82     return OTS_FAILURE();
     83   }
     84 
     85   uint16_t segcountx2, search_range, entry_selector, range_shift;
     86   segcountx2 = search_range = entry_selector = range_shift = 0;
     87   if (!subtable.ReadU16(&segcountx2) ||
     88       !subtable.ReadU16(&search_range) ||
     89       !subtable.ReadU16(&entry_selector) ||
     90       !subtable.ReadU16(&range_shift)) {
     91     return OTS_FAILURE();
     92   }
     93 
     94   if (segcountx2 & 1 || search_range & 1) {
     95     return OTS_FAILURE();
     96   }
     97   const uint16_t segcount = segcountx2 >> 1;
     98   // There must be at least one segment according the spec.
     99   if (segcount < 1) {
    100     return OTS_FAILURE();
    101   }
    102 
    103   // log2segcount is the maximal x s.t. 2^x < segcount
    104   unsigned log2segcount = 0;
    105   while (1u << (log2segcount + 1) <= segcount) {
    106     log2segcount++;
    107   }
    108 
    109   const uint16_t expected_search_range = 2 * 1u << log2segcount;
    110   if (expected_search_range != search_range) {
    111     return OTS_FAILURE();
    112   }
    113 
    114   if (entry_selector != log2segcount) {
    115     return OTS_FAILURE();
    116   }
    117 
    118   const uint16_t expected_range_shift = segcountx2 - search_range;
    119   if (range_shift != expected_range_shift) {
    120     return OTS_FAILURE();
    121   }
    122 
    123   std::vector<Subtable314Range> ranges(segcount);
    124 
    125   for (unsigned i = 0; i < segcount; ++i) {
    126     if (!subtable.ReadU16(&ranges[i].end_range)) {
    127       return OTS_FAILURE();
    128     }
    129   }
    130 
    131   uint16_t padding;
    132   if (!subtable.ReadU16(&padding)) {
    133     return OTS_FAILURE();
    134   }
    135   if (padding) {
    136     return OTS_FAILURE();
    137   }
    138 
    139   for (unsigned i = 0; i < segcount; ++i) {
    140     if (!subtable.ReadU16(&ranges[i].start_range)) {
    141       return OTS_FAILURE();
    142     }
    143   }
    144   for (unsigned i = 0; i < segcount; ++i) {
    145     if (!subtable.ReadS16(&ranges[i].id_delta)) {
    146       return OTS_FAILURE();
    147     }
    148   }
    149   for (unsigned i = 0; i < segcount; ++i) {
    150     ranges[i].id_range_offset_offset = subtable.offset();
    151     if (!subtable.ReadU16(&ranges[i].id_range_offset)) {
    152       return OTS_FAILURE();
    153     }
    154 
    155     if (ranges[i].id_range_offset & 1) {
    156       // Some font generators seem to put 65535 on id_range_offset
    157       // for 0xFFFF-0xFFFF range.
    158       // (e.g., many fonts in http://www.princexml.com/fonts/)
    159       if (i == segcount - 1u) {
    160         OTS_WARNING("bad id_range_offset");
    161         ranges[i].id_range_offset = 0;
    162         // The id_range_offset value in the transcoded font will not change
    163         // since this table is not actually "transcoded" yet.
    164       } else {
    165         return OTS_FAILURE();
    166       }
    167     }
    168   }
    169 
    170   // ranges must be ascending order, based on the end_code. Ranges may not
    171   // overlap.
    172   for (unsigned i = 1; i < segcount; ++i) {
    173     if ((i == segcount - 1u) &&
    174         (ranges[i - 1].start_range == 0xffff) &&
    175         (ranges[i - 1].end_range == 0xffff) &&
    176         (ranges[i].start_range == 0xffff) &&
    177         (ranges[i].end_range == 0xffff)) {
    178       // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators.
    179       // We'll accept them as an exception.
    180       OTS_WARNING("multiple 0xffff terminators found");
    181       continue;
    182     }
    183 
    184     // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have
    185     // unsorted table...
    186     if (ranges[i].end_range <= ranges[i - 1].end_range) {
    187       return OTS_FAILURE();
    188     }
    189     if (ranges[i].start_range <= ranges[i - 1].end_range) {
    190       return OTS_FAILURE();
    191     }
    192 
    193     // On many fonts, the value of {first, last}_char_index are incorrect.
    194     // Fix them.
    195     if (file->os2->first_char_index != 0xFFFF &&
    196         ranges[i].start_range != 0xFFFF &&
    197         file->os2->first_char_index > ranges[i].start_range) {
    198       file->os2->first_char_index = ranges[i].start_range;
    199     }
    200     if (file->os2->last_char_index != 0xFFFF &&
    201         ranges[i].end_range != 0xFFFF &&
    202         file->os2->last_char_index < ranges[i].end_range) {
    203       file->os2->last_char_index = ranges[i].end_range;
    204     }
    205   }
    206 
    207   // The last range must end at 0xffff
    208   if (ranges[segcount - 1].end_range != 0xffff) {
    209     return OTS_FAILURE();
    210   }
    211 
    212   // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
    213   // each code-point defined in the table and make sure that they are all valid
    214   // glyphs and that we don't access anything out-of-bounds.
    215   for (unsigned i = 0; i < segcount; ++i) {
    216     for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) {
    217       const uint16_t code_point = cp;
    218       if (ranges[i].id_range_offset == 0) {
    219         // this is explictly allowed to overflow in the spec
    220         const uint16_t glyph = code_point + ranges[i].id_delta;
    221         if (glyph >= num_glyphs) {
    222           return OTS_FAILURE();
    223         }
    224       } else {
    225         const uint16_t range_delta = code_point - ranges[i].start_range;
    226         // this might seem odd, but it's true. The offset is relative to the
    227         // location of the offset value itself.
    228         const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset +
    229                                          ranges[i].id_range_offset +
    230                                          range_delta * 2;
    231         // We need to be able to access a 16-bit value from this offset
    232         if (glyph_id_offset + 1 >= length) {
    233           return OTS_FAILURE();
    234         }
    235         uint16_t glyph;
    236         std::memcpy(&glyph, data + glyph_id_offset, 2);
    237         glyph = ntohs(glyph);
    238         if (glyph >= num_glyphs) {
    239           return OTS_FAILURE();
    240         }
    241       }
    242     }
    243   }
    244 
    245   // We accept the table.
    246   // TODO(yusukes): transcode the subtable.
    247   if (platform == 3 && encoding == 0) {
    248     file->cmap->subtable_3_0_4_data = data;
    249     file->cmap->subtable_3_0_4_length = length;
    250   } else if (platform == 3 && encoding == 1) {
    251     file->cmap->subtable_3_1_4_data = data;
    252     file->cmap->subtable_3_1_4_length = length;
    253   } else if (platform == 0 && encoding == 3) {
    254     file->cmap->subtable_0_3_4_data = data;
    255     file->cmap->subtable_0_3_4_length = length;
    256   } else {
    257     return OTS_FAILURE();
    258   }
    259 
    260   return true;
    261 }
    262 
    263 bool Parse31012(ots::OpenTypeFile *file,
    264                 const uint8_t *data, size_t length, uint16_t num_glyphs) {
    265   ots::Buffer subtable(data, length);
    266 
    267   // Format 12 tables are simple. We parse these and fully serialise them
    268   // later.
    269 
    270   if (!subtable.Skip(8)) {
    271     return OTS_FAILURE();
    272   }
    273   uint32_t language = 0;
    274   if (!subtable.ReadU32(&language)) {
    275     return OTS_FAILURE();
    276   }
    277   if (language) {
    278     return OTS_FAILURE();
    279   }
    280 
    281   uint32_t num_groups = 0;
    282   if (!subtable.ReadU32(&num_groups)) {
    283     return OTS_FAILURE();
    284   }
    285   if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
    286     return OTS_FAILURE();
    287   }
    288 
    289   std::vector<ots::OpenTypeCMAPSubtableRange> &groups
    290       = file->cmap->subtable_3_10_12;
    291   groups.resize(num_groups);
    292 
    293   for (unsigned i = 0; i < num_groups; ++i) {
    294     if (!subtable.ReadU32(&groups[i].start_range) ||
    295         !subtable.ReadU32(&groups[i].end_range) ||
    296         !subtable.ReadU32(&groups[i].start_glyph_id)) {
    297       return OTS_FAILURE();
    298     }
    299 
    300     if (groups[i].start_range > kUnicodeUpperLimit ||
    301         groups[i].end_range > kUnicodeUpperLimit ||
    302         groups[i].start_glyph_id > 0xFFFF) {
    303       return OTS_FAILURE();
    304     }
    305 
    306     // [0xD800, 0xDFFF] are surrogate code points.
    307     if (groups[i].start_range >= 0xD800 &&
    308         groups[i].start_range <= 0xDFFF) {
    309       return OTS_FAILURE();
    310     }
    311     if (groups[i].end_range >= 0xD800 &&
    312         groups[i].end_range <= 0xDFFF) {
    313       return OTS_FAILURE();
    314     }
    315     if (groups[i].start_range < 0xD800 &&
    316         groups[i].end_range > 0xDFFF) {
    317       return OTS_FAILURE();
    318     }
    319 
    320     // We assert that the glyph value is within range. Because of the range
    321     // limits, above, we don't need to worry about overflow.
    322     if (groups[i].end_range < groups[i].start_range) {
    323       return OTS_FAILURE();
    324     }
    325     if ((groups[i].end_range - groups[i].start_range) +
    326         groups[i].start_glyph_id > num_glyphs) {
    327       return OTS_FAILURE();
    328     }
    329   }
    330 
    331   // the groups must be sorted by start code and may not overlap
    332   for (unsigned i = 1; i < num_groups; ++i) {
    333     if (groups[i].start_range <= groups[i - 1].start_range) {
    334       return OTS_FAILURE();
    335     }
    336     if (groups[i].start_range <= groups[i - 1].end_range) {
    337       return OTS_FAILURE();
    338     }
    339   }
    340 
    341   return true;
    342 }
    343 
    344 bool Parse31013(ots::OpenTypeFile *file,
    345                 const uint8_t *data, size_t length, uint16_t num_glyphs) {
    346   ots::Buffer subtable(data, length);
    347 
    348   // Format 13 tables are simple. We parse these and fully serialise them
    349   // later.
    350 
    351   if (!subtable.Skip(8)) {
    352     return OTS_FAILURE();
    353   }
    354   uint16_t language = 0;
    355   if (!subtable.ReadU16(&language)) {
    356     return OTS_FAILURE();
    357   }
    358   if (language) {
    359     return OTS_FAILURE();
    360   }
    361 
    362   uint32_t num_groups = 0;
    363   if (!subtable.ReadU32(&num_groups)) {
    364     return OTS_FAILURE();
    365   }
    366 
    367   // We limit the number of groups in the same way as in 3.10.12 tables. See
    368   // the comment there in
    369   if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
    370     return OTS_FAILURE();
    371   }
    372 
    373   std::vector<ots::OpenTypeCMAPSubtableRange> &groups
    374       = file->cmap->subtable_3_10_13;
    375   groups.resize(num_groups);
    376 
    377   for (unsigned i = 0; i < num_groups; ++i) {
    378     if (!subtable.ReadU32(&groups[i].start_range) ||
    379         !subtable.ReadU32(&groups[i].end_range) ||
    380         !subtable.ReadU32(&groups[i].start_glyph_id)) {
    381       return OTS_FAILURE();
    382     }
    383 
    384     // We conservatively limit all of the values to protect some parsers from
    385     // overflows
    386     if (groups[i].start_range > kUnicodeUpperLimit ||
    387         groups[i].end_range > kUnicodeUpperLimit ||
    388         groups[i].start_glyph_id > 0xFFFF) {
    389       return OTS_FAILURE();
    390     }
    391 
    392     if (groups[i].start_glyph_id >= num_glyphs) {
    393       return OTS_FAILURE();
    394     }
    395   }
    396 
    397   // the groups must be sorted by start code and may not overlap
    398   for (unsigned i = 1; i < num_groups; ++i) {
    399     if (groups[i].start_range <= groups[i - 1].start_range) {
    400       return OTS_FAILURE();
    401     }
    402     if (groups[i].start_range <= groups[i - 1].end_range) {
    403       return OTS_FAILURE();
    404     }
    405   }
    406 
    407   return true;
    408 }
    409 
    410 bool Parse0514(ots::OpenTypeFile *file,
    411                const uint8_t *data, size_t length, uint16_t num_glyphs) {
    412   // Unicode Variation Selector table
    413   ots::Buffer subtable(data, length);
    414 
    415   // Format 14 tables are simple. We parse these and fully serialise them
    416   // later.
    417 
    418   // Skip format (USHORT) and length (ULONG)
    419   if (!subtable.Skip(6)) {
    420     return OTS_FAILURE();
    421   }
    422 
    423   uint32_t num_records = 0;
    424   if (!subtable.ReadU32(&num_records)) {
    425     return OTS_FAILURE();
    426   }
    427   if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) {
    428     return OTS_FAILURE();
    429   }
    430 
    431   std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records
    432       = file->cmap->subtable_0_5_14;
    433   records.resize(num_records);
    434 
    435   for (unsigned i = 0; i < num_records; ++i) {
    436     if (!subtable.ReadU24(&records[i].var_selector) ||
    437         !subtable.ReadU32(&records[i].default_offset) ||
    438         !subtable.ReadU32(&records[i].non_default_offset)) {
    439       return OTS_FAILURE();
    440     }
    441     // Checks the value of variation selector
    442     if (!((records[i].var_selector >= kMongolianVSStart &&
    443            records[i].var_selector <= kMongolianVSEnd) ||
    444           (records[i].var_selector >= kVSStart &&
    445            records[i].var_selector <= kVSEnd) ||
    446           (records[i].var_selector >= kIVSStart &&
    447            records[i].var_selector <= kIVSEnd))) {
    448       return OTS_FAILURE();
    449     }
    450     if (i > 0 &&
    451         records[i-1].var_selector >= records[i].var_selector) {
    452       return OTS_FAILURE();
    453     }
    454 
    455     // Checks offsets
    456     if (!records[i].default_offset && !records[i].non_default_offset) {
    457       return OTS_FAILURE();
    458     }
    459     if (records[i].default_offset &&
    460         records[i].default_offset >= length) {
    461       return OTS_FAILURE();
    462     }
    463     if (records[i].non_default_offset &&
    464         records[i].non_default_offset >= length) {
    465       return OTS_FAILURE();
    466     }
    467   }
    468 
    469   for (unsigned i = 0; i < num_records; ++i) {
    470     // Checks default UVS table
    471     if (records[i].default_offset) {
    472       subtable.set_offset(records[i].default_offset);
    473       uint32_t num_ranges = 0;
    474       if (!subtable.ReadU32(&num_ranges)) {
    475         return OTS_FAILURE();
    476       }
    477       if (!num_ranges || num_ranges > kMaxCMAPGroups) {
    478         return OTS_FAILURE();
    479       }
    480 
    481       uint32_t last_unicode_value = 0;
    482       std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges
    483           = records[i].ranges;
    484       ranges.resize(num_ranges);
    485 
    486       for (unsigned j = 0; j < num_ranges; ++j) {
    487         if (!subtable.ReadU24(&ranges[j].unicode_value) ||
    488             !subtable.ReadU8(&ranges[j].additional_count)) {
    489           return OTS_FAILURE();
    490         }
    491         const uint32_t check_value =
    492             ranges[j].unicode_value + ranges[j].additional_count;
    493         if (ranges[j].unicode_value == 0 ||
    494             ranges[j].unicode_value > kUnicodeUpperLimit ||
    495             check_value > kUVSUpperLimit ||
    496             (last_unicode_value &&
    497              ranges[j].unicode_value <= last_unicode_value)) {
    498           return OTS_FAILURE();
    499         }
    500         last_unicode_value = check_value;
    501       }
    502     }
    503 
    504     // Checks non default UVS table
    505     if (records[i].non_default_offset) {
    506       subtable.set_offset(records[i].non_default_offset);
    507       uint32_t num_mappings = 0;
    508       if (!subtable.ReadU32(&num_mappings)) {
    509         return OTS_FAILURE();
    510       }
    511       if (!num_mappings || num_mappings > kMaxCMAPGroups) {
    512         return OTS_FAILURE();
    513       }
    514 
    515       uint32_t last_unicode_value = 0;
    516       std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings
    517           = records[i].mappings;
    518       mappings.resize(num_mappings);
    519 
    520       for (unsigned j = 0; j < num_mappings; ++j) {
    521         if (!subtable.ReadU24(&mappings[j].unicode_value) ||
    522             !subtable.ReadU16(&mappings[j].glyph_id)) {
    523           return OTS_FAILURE();
    524         }
    525         if (mappings[j].glyph_id == 0 ||
    526             mappings[j].unicode_value == 0 ||
    527             mappings[j].unicode_value > kUnicodeUpperLimit ||
    528             (last_unicode_value &&
    529              mappings[j].unicode_value <= last_unicode_value)) {
    530           return OTS_FAILURE();
    531         }
    532         last_unicode_value = mappings[j].unicode_value;
    533       }
    534     }
    535   }
    536 
    537   if (subtable.offset() != length) {
    538     return OTS_FAILURE();
    539   }
    540   file->cmap->subtable_0_5_14_length = subtable.offset();
    541   return true;
    542 }
    543 
    544 bool Parse100(ots::OpenTypeFile *file, const uint8_t *data, size_t length) {
    545   // Mac Roman table
    546   ots::Buffer subtable(data, length);
    547 
    548   if (!subtable.Skip(4)) {
    549     return OTS_FAILURE();
    550   }
    551   uint16_t language = 0;
    552   if (!subtable.ReadU16(&language)) {
    553     return OTS_FAILURE();
    554   }
    555   if (language) {
    556     // simsun.ttf has non-zero language id.
    557     OTS_WARNING("language id should be zero: %u", language);
    558   }
    559 
    560   file->cmap->subtable_1_0_0.reserve(kFormat0ArraySize);
    561   for (size_t i = 0; i < kFormat0ArraySize; ++i) {
    562     uint8_t glyph_id = 0;
    563     if (!subtable.ReadU8(&glyph_id)) {
    564       return OTS_FAILURE();
    565     }
    566     file->cmap->subtable_1_0_0.push_back(glyph_id);
    567   }
    568 
    569   return true;
    570 }
    571 
    572 }  // namespace
    573 
    574 namespace ots {
    575 
    576 bool ots_cmap_parse(OpenTypeFile *file, const uint8_t *data, size_t length) {
    577   Buffer table(data, length);
    578   file->cmap = new OpenTypeCMAP;
    579 
    580   uint16_t version = 0;
    581   uint16_t num_tables = 0;
    582   if (!table.ReadU16(&version) ||
    583       !table.ReadU16(&num_tables)) {
    584     return OTS_FAILURE();
    585   }
    586 
    587   if (version != 0) {
    588     return OTS_FAILURE();
    589   }
    590   if (!num_tables) {
    591     return OTS_FAILURE();
    592   }
    593 
    594   std::vector<CMAPSubtableHeader> subtable_headers;
    595 
    596   // read the subtable headers
    597   subtable_headers.reserve(num_tables);
    598   for (unsigned i = 0; i < num_tables; ++i) {
    599     CMAPSubtableHeader subt;
    600 
    601     if (!table.ReadU16(&subt.platform) ||
    602         !table.ReadU16(&subt.encoding) ||
    603         !table.ReadU32(&subt.offset)) {
    604       return OTS_FAILURE();
    605     }
    606 
    607     subtable_headers.push_back(subt);
    608   }
    609 
    610   const size_t data_offset = table.offset();
    611 
    612   // make sure that all the offsets are valid.
    613   uint32_t last_id = 0;
    614   for (unsigned i = 0; i < num_tables; ++i) {
    615     if (subtable_headers[i].offset > 1024 * 1024 * 1024) {
    616       return OTS_FAILURE();
    617     }
    618     if (subtable_headers[i].offset < data_offset ||
    619         subtable_headers[i].offset >= length) {
    620       return OTS_FAILURE();
    621     }
    622 
    623     // check if the table is sorted first by platform ID, then by encoding ID.
    624     uint32_t current_id
    625         = (subtable_headers[i].platform << 16) + subtable_headers[i].encoding;
    626     if ((i != 0) && (last_id >= current_id)) {
    627       return OTS_FAILURE();
    628     }
    629     last_id = current_id;
    630   }
    631 
    632   // the format of the table is the first couple of bytes in the table. The
    633   // length of the table is stored in a format-specific way.
    634   for (unsigned i = 0; i < num_tables; ++i) {
    635     table.set_offset(subtable_headers[i].offset);
    636     if (!table.ReadU16(&subtable_headers[i].format)) {
    637       return OTS_FAILURE();
    638     }
    639 
    640     uint16_t len = 0;
    641     switch (subtable_headers[i].format) {
    642       case 0:
    643       case 4:
    644         if (!table.ReadU16(&len)) {
    645           return OTS_FAILURE();
    646         }
    647         subtable_headers[i].length = len;
    648         break;
    649       case 12:
    650       case 13:
    651         if (!table.Skip(2)) {
    652           return OTS_FAILURE();
    653         }
    654         if (!table.ReadU32(&subtable_headers[i].length)) {
    655           return OTS_FAILURE();
    656         }
    657         break;
    658       case 14:
    659         if (!table.ReadU32(&subtable_headers[i].length)) {
    660           return OTS_FAILURE();
    661         }
    662         break;
    663       default:
    664         subtable_headers[i].length = 0;
    665         break;
    666     }
    667   }
    668 
    669   // Now, verify that all the lengths are sane
    670   for (unsigned i = 0; i < num_tables; ++i) {
    671     if (!subtable_headers[i].length) continue;
    672     if (subtable_headers[i].length > 1024 * 1024 * 1024) {
    673       return OTS_FAILURE();
    674     }
    675     // We know that both the offset and length are < 1GB, so the following
    676     // addition doesn't overflow
    677     const uint32_t end_byte
    678         = subtable_headers[i].offset + subtable_headers[i].length;
    679     if (end_byte > length) {
    680       return OTS_FAILURE();
    681     }
    682   }
    683 
    684   // check that the cmap subtables are not overlapping.
    685   std::set<std::pair<uint32_t, uint32_t> > uniq_checker;
    686   std::vector<std::pair<uint32_t, uint8_t> > overlap_checker;
    687   for (unsigned i = 0; i < num_tables; ++i) {
    688     const uint32_t end_byte
    689         = subtable_headers[i].offset + subtable_headers[i].length;
    690 
    691     if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset,
    692                                             end_byte)).second) {
    693       // Sometimes Unicode table and MS table share exactly the same data.
    694       // We'll allow this.
    695       continue;
    696     }
    697     overlap_checker.push_back(
    698         std::make_pair(subtable_headers[i].offset,
    699                        static_cast<uint8_t>(1) /* start */));
    700     overlap_checker.push_back(
    701         std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */));
    702   }
    703   std::sort(overlap_checker.begin(), overlap_checker.end());
    704   int overlap_count = 0;
    705   for (unsigned i = 0; i < overlap_checker.size(); ++i) {
    706     overlap_count += (overlap_checker[i].second ? 1 : -1);
    707     if (overlap_count > 1) {
    708       return OTS_FAILURE();
    709     }
    710   }
    711 
    712   // we grab the number of glyphs in the file from the maxp table to make sure
    713   // that the character map isn't referencing anything beyound this range.
    714   if (!file->maxp) {
    715     return OTS_FAILURE();
    716   }
    717   const uint16_t num_glyphs = file->maxp->num_glyphs;
    718 
    719   // We only support a subset of the possible character map tables. Microsoft
    720   // 'strongly recommends' that everyone supports the Unicode BMP table with
    721   // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
    722   //   Platform ID   Encoding ID  Format
    723   //   0             0            4       (Unicode Default)
    724   //   0             3            4       (Unicode BMP)
    725   //   0             3            12      (Unicode UCS-4)
    726   //   0             5            14      (Unicode Variation Sequences)
    727   //   1             0            0       (Mac Roman)
    728   //   3             0            4       (MS Symbol)
    729   //   3             1            4       (MS Unicode BMP)
    730   //   3             10           12      (MS Unicode UCS-4)
    731   //   3             10           13      (MS UCS-4 Fallback mapping)
    732   //
    733   // Note:
    734   //  * 0-0-4 table is (usually) written as a 3-1-4 table. If 3-1-4 table
    735   //    also exists, the 0-0-4 table is ignored.
    736   //  * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table.
    737   //    Some fonts which include 0-5-14 table seems to be required 0-3-4
    738   //    table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists.
    739   //  * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also
    740   //    exists, the 0-3-12 table is ignored.
    741   //
    742 
    743   for (unsigned i = 0; i < num_tables; ++i) {
    744     if (subtable_headers[i].platform == 0) {
    745       // Unicode platform
    746 
    747       if ((subtable_headers[i].encoding == 0) &&
    748           (subtable_headers[i].format == 4)) {
    749         // parse and output the 0-0-4 table as 3-1-4 table. Sometimes the 0-0-4
    750         // table actually points to MS symbol data and thus should be parsed as
    751         // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be
    752         // recovered in ots_cmap_serialise().
    753         if (!ParseFormat4(file, 3, 1, data + subtable_headers[i].offset,
    754                       subtable_headers[i].length, num_glyphs)) {
    755           return OTS_FAILURE();
    756         }
    757       } else if ((subtable_headers[i].encoding == 3) &&
    758                  (subtable_headers[i].format == 4)) {
    759         // parse and output the 0-3-4 table as 0-3-4 table.
    760         if (!ParseFormat4(file, 0, 3, data + subtable_headers[i].offset,
    761                       subtable_headers[i].length, num_glyphs)) {
    762           return OTS_FAILURE();
    763         }
    764       } else if ((subtable_headers[i].encoding == 3) &&
    765                  (subtable_headers[i].format == 12)) {
    766         // parse and output the 0-3-12 table as 3-10-12 table.
    767         if (!Parse31012(file, data + subtable_headers[i].offset,
    768                         subtable_headers[i].length, num_glyphs)) {
    769           return OTS_FAILURE();
    770         }
    771       } else if ((subtable_headers[i].encoding == 5) &&
    772                  (subtable_headers[i].format == 14)) {
    773         if (!Parse0514(file, data + subtable_headers[i].offset,
    774                        subtable_headers[i].length, num_glyphs)) {
    775           return OTS_FAILURE();
    776         }
    777       }
    778     } else if (subtable_headers[i].platform == 1) {
    779       // Mac platform
    780 
    781       if ((subtable_headers[i].encoding == 0) &&
    782           (subtable_headers[i].format == 0)) {
    783         // parse and output the 1-0-0 table.
    784         if (!Parse100(file, data + subtable_headers[i].offset,
    785                       subtable_headers[i].length)) {
    786           return OTS_FAILURE();
    787         }
    788       }
    789     } else if (subtable_headers[i].platform == 3) {
    790       // MS platform
    791 
    792       switch (subtable_headers[i].encoding) {
    793         case 0:
    794         case 1:
    795           if (subtable_headers[i].format == 4) {
    796             // parse 3-0-4 or 3-1-4 table.
    797             if (!ParseFormat4(file, subtable_headers[i].platform,
    798                           subtable_headers[i].encoding,
    799                           data + subtable_headers[i].offset,
    800                           subtable_headers[i].length, num_glyphs)) {
    801               return OTS_FAILURE();
    802             }
    803           }
    804           break;
    805         case 10:
    806           if (subtable_headers[i].format == 12) {
    807             file->cmap->subtable_3_10_12.clear();
    808             if (!Parse31012(file, data + subtable_headers[i].offset,
    809                             subtable_headers[i].length, num_glyphs)) {
    810               return OTS_FAILURE();
    811             }
    812           } else if (subtable_headers[i].format == 13) {
    813             file->cmap->subtable_3_10_13.clear();
    814             if (!Parse31013(file, data + subtable_headers[i].offset,
    815                             subtable_headers[i].length, num_glyphs)) {
    816               return OTS_FAILURE();
    817             }
    818           }
    819           break;
    820       }
    821     }
    822   }
    823 
    824   return true;
    825 }
    826 
    827 bool ots_cmap_should_serialise(OpenTypeFile *file) {
    828   return file->cmap != NULL;
    829 }
    830 
    831 bool ots_cmap_serialise(OTSStream *out, OpenTypeFile *file) {
    832   const bool have_034 = file->cmap->subtable_0_3_4_data != NULL;
    833   const bool have_0514 = file->cmap->subtable_0_5_14.size() != 0;
    834   const bool have_100 = file->cmap->subtable_1_0_0.size() != 0;
    835   const bool have_304 = file->cmap->subtable_3_0_4_data != NULL;
    836   // MS Symbol and MS Unicode tables should not co-exist.
    837   // See the comment above in 0-0-4 parser.
    838   const bool have_314 = (!have_304) && file->cmap->subtable_3_1_4_data;
    839   const bool have_31012 = file->cmap->subtable_3_10_12.size() != 0;
    840   const bool have_31013 = file->cmap->subtable_3_10_13.size() != 0;
    841   const unsigned num_subtables = static_cast<unsigned>(have_034) +
    842                                  static_cast<unsigned>(have_0514) +
    843                                  static_cast<unsigned>(have_100) +
    844                                  static_cast<unsigned>(have_304) +
    845                                  static_cast<unsigned>(have_314) +
    846                                  static_cast<unsigned>(have_31012) +
    847                                  static_cast<unsigned>(have_31013);
    848   const off_t table_start = out->Tell();
    849 
    850   // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables
    851   // (e.g., old fonts for Mac). We don't support them.
    852   if (!have_304 && !have_314 && !have_034) {
    853     return OTS_FAILURE();
    854   }
    855 
    856   if (!out->WriteU16(0) ||
    857       !out->WriteU16(num_subtables)) {
    858     return OTS_FAILURE();
    859   }
    860 
    861   const off_t record_offset = out->Tell();
    862   if (!out->Pad(num_subtables * 8)) {
    863     return OTS_FAILURE();
    864   }
    865 
    866   const off_t offset_034 = out->Tell();
    867   if (have_034) {
    868     if (!out->Write(file->cmap->subtable_0_3_4_data,
    869                     file->cmap->subtable_0_3_4_length)) {
    870       return OTS_FAILURE();
    871     }
    872   }
    873 
    874   const off_t offset_0514 = out->Tell();
    875   if (have_0514) {
    876     const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records
    877         = file->cmap->subtable_0_5_14;
    878     const unsigned num_records = records.size();
    879     if (!out->WriteU16(14) ||
    880         !out->WriteU32(file->cmap->subtable_0_5_14_length) ||
    881         !out->WriteU32(num_records)) {
    882       return OTS_FAILURE();
    883     }
    884     for (unsigned i = 0; i < num_records; ++i) {
    885       if (!out->WriteU24(records[i].var_selector) ||
    886           !out->WriteU32(records[i].default_offset) ||
    887           !out->WriteU32(records[i].non_default_offset)) {
    888         return OTS_FAILURE();
    889       }
    890     }
    891     for (unsigned i = 0; i < num_records; ++i) {
    892       if (records[i].default_offset) {
    893         const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges
    894             = records[i].ranges;
    895         const unsigned num_ranges = ranges.size();
    896         if (!out->Seek(records[i].default_offset + offset_0514) ||
    897             !out->WriteU32(num_ranges)) {
    898           return OTS_FAILURE();
    899         }
    900         for (unsigned j = 0; j < num_ranges; ++j) {
    901           if (!out->WriteU24(ranges[j].unicode_value) ||
    902               !out->WriteU8(ranges[j].additional_count)) {
    903             return OTS_FAILURE();
    904           }
    905         }
    906       }
    907       if (records[i].non_default_offset) {
    908         const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings
    909             = records[i].mappings;
    910         const unsigned num_mappings = mappings.size();
    911         if (!out->Seek(records[i].non_default_offset + offset_0514) ||
    912             !out->WriteU32(num_mappings)) {
    913           return OTS_FAILURE();
    914         }
    915         for (unsigned j = 0; j < num_mappings; ++j) {
    916           if (!out->WriteU24(mappings[j].unicode_value) ||
    917               !out->WriteU16(mappings[j].glyph_id)) {
    918             return OTS_FAILURE();
    919           }
    920         }
    921       }
    922     }
    923   }
    924 
    925   const off_t offset_100 = out->Tell();
    926   if (have_100) {
    927     if (!out->WriteU16(0) ||  // format
    928         !out->WriteU16(6 + kFormat0ArraySize) ||  // length
    929         !out->WriteU16(0)) {  // language
    930       return OTS_FAILURE();
    931     }
    932     if (!out->Write(&(file->cmap->subtable_1_0_0[0]), kFormat0ArraySize)) {
    933       return OTS_FAILURE();
    934     }
    935   }
    936 
    937   const off_t offset_304 = out->Tell();
    938   if (have_304) {
    939     if (!out->Write(file->cmap->subtable_3_0_4_data,
    940                     file->cmap->subtable_3_0_4_length)) {
    941       return OTS_FAILURE();
    942     }
    943   }
    944 
    945   const off_t offset_314 = out->Tell();
    946   if (have_314) {
    947     if (!out->Write(file->cmap->subtable_3_1_4_data,
    948                     file->cmap->subtable_3_1_4_length)) {
    949       return OTS_FAILURE();
    950     }
    951   }
    952 
    953   const off_t offset_31012 = out->Tell();
    954   if (have_31012) {
    955     std::vector<OpenTypeCMAPSubtableRange> &groups
    956         = file->cmap->subtable_3_10_12;
    957     const unsigned num_groups = groups.size();
    958     if (!out->WriteU16(12) ||
    959         !out->WriteU16(0) ||
    960         !out->WriteU32(num_groups * 12 + 16) ||
    961         !out->WriteU32(0) ||
    962         !out->WriteU32(num_groups)) {
    963       return OTS_FAILURE();
    964     }
    965 
    966     for (unsigned i = 0; i < num_groups; ++i) {
    967       if (!out->WriteU32(groups[i].start_range) ||
    968           !out->WriteU32(groups[i].end_range) ||
    969           !out->WriteU32(groups[i].start_glyph_id)) {
    970         return OTS_FAILURE();
    971       }
    972     }
    973   }
    974 
    975   const off_t offset_31013 = out->Tell();
    976   if (have_31013) {
    977     std::vector<OpenTypeCMAPSubtableRange> &groups
    978         = file->cmap->subtable_3_10_13;
    979     const unsigned num_groups = groups.size();
    980     if (!out->WriteU16(13) ||
    981         !out->WriteU16(0) ||
    982         !out->WriteU32(num_groups * 12 + 14) ||
    983         !out->WriteU32(0) ||
    984         !out->WriteU32(num_groups)) {
    985       return OTS_FAILURE();
    986     }
    987 
    988     for (unsigned i = 0; i < num_groups; ++i) {
    989       if (!out->WriteU32(groups[i].start_range) ||
    990           !out->WriteU32(groups[i].end_range) ||
    991           !out->WriteU32(groups[i].start_glyph_id)) {
    992         return OTS_FAILURE();
    993       }
    994     }
    995   }
    996 
    997   const off_t table_end = out->Tell();
    998   // We might have hanging bytes from the above's checksum which the OTSStream
    999   // then merges into the table of offsets.
   1000   OTSStream::ChecksumState saved_checksum = out->SaveChecksumState();
   1001   out->ResetChecksum();
   1002 
   1003   // Now seek back and write the table of offsets
   1004   if (!out->Seek(record_offset)) {
   1005     return OTS_FAILURE();
   1006   }
   1007 
   1008   if (have_034) {
   1009     if (!out->WriteU16(0) ||
   1010         !out->WriteU16(3) ||
   1011         !out->WriteU32(offset_034 - table_start)) {
   1012       return OTS_FAILURE();
   1013     }
   1014   }
   1015 
   1016   if (have_0514) {
   1017     if (!out->WriteU16(0) ||
   1018         !out->WriteU16(5) ||
   1019         !out->WriteU32(offset_0514 - table_start)) {
   1020       return OTS_FAILURE();
   1021     }
   1022   }
   1023 
   1024   if (have_100) {
   1025     if (!out->WriteU16(1) ||
   1026         !out->WriteU16(0) ||
   1027         !out->WriteU32(offset_100 - table_start)) {
   1028       return OTS_FAILURE();
   1029     }
   1030   }
   1031 
   1032   if (have_304) {
   1033     if (!out->WriteU16(3) ||
   1034         !out->WriteU16(0) ||
   1035         !out->WriteU32(offset_304 - table_start)) {
   1036       return OTS_FAILURE();
   1037     }
   1038   }
   1039 
   1040   if (have_314) {
   1041     if (!out->WriteU16(3) ||
   1042         !out->WriteU16(1) ||
   1043         !out->WriteU32(offset_314 - table_start)) {
   1044       return OTS_FAILURE();
   1045     }
   1046   }
   1047 
   1048   if (have_31012) {
   1049     if (!out->WriteU16(3) ||
   1050         !out->WriteU16(10) ||
   1051         !out->WriteU32(offset_31012 - table_start)) {
   1052       return OTS_FAILURE();
   1053     }
   1054   }
   1055 
   1056   if (have_31013) {
   1057     if (!out->WriteU16(3) ||
   1058         !out->WriteU16(10) ||
   1059         !out->WriteU32(offset_31013 - table_start)) {
   1060       return OTS_FAILURE();
   1061     }
   1062   }
   1063 
   1064   if (!out->Seek(table_end)) {
   1065     return OTS_FAILURE();
   1066   }
   1067   out->RestoreChecksum(saved_checksum);
   1068 
   1069   return true;
   1070 }
   1071 
   1072 void ots_cmap_free(OpenTypeFile *file) {
   1073   delete file->cmap;
   1074 }
   1075 
   1076 }  // namespace ots
   1077