Home | History | Annotate | Download | only in pdf
      1 /*
      2  * Copyright 2011 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkPDFMakeToUnicodeCmap.h"
      9 
     10 #include "SkPDFUtils.h"
     11 #include "SkTo.h"
     12 #include "SkUTF.h"
     13 
     14 static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
     15                                     bool multibyte) {
     16     // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
     17     // It's there to prevent old version Adobe Readers from malfunctioning.
     18     const char* kHeader =
     19         "/CIDInit /ProcSet findresource begin\n"
     20         "12 dict begin\n"
     21         "begincmap\n";
     22     cmap->writeText(kHeader);
     23 
     24     // The /CIDSystemInfo must be consistent to the one in
     25     // SkPDFFont::populateCIDFont().
     26     // We can not pass over the system info object here because the format is
     27     // different. This is not a reference object.
     28     const char* kSysInfo =
     29         "/CIDSystemInfo\n"
     30         "<<  /Registry (Adobe)\n"
     31         "/Ordering (UCS)\n"
     32         "/Supplement 0\n"
     33         ">> def\n";
     34     cmap->writeText(kSysInfo);
     35 
     36     // The CMapName must be consistent to /CIDSystemInfo above.
     37     // /CMapType 2 means ToUnicode.
     38     // Codespace range just tells the PDF processor the valid range.
     39     const char* kTypeInfoHeader =
     40         "/CMapName /Adobe-Identity-UCS def\n"
     41         "/CMapType 2 def\n"
     42         "1 begincodespacerange\n";
     43     cmap->writeText(kTypeInfoHeader);
     44     if (multibyte) {
     45         cmap->writeText("<0000> <FFFF>\n");
     46     } else {
     47         cmap->writeText("<00> <FF>\n");
     48     }
     49     cmap->writeText("endcodespacerange\n");
     50 }
     51 
     52 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
     53     const char kFooter[] =
     54         "endcmap\n"
     55         "CMapName currentdict /CMap defineresource pop\n"
     56         "end\n"
     57         "end";
     58     cmap->writeText(kFooter);
     59 }
     60 
     61 namespace {
     62 struct BFChar {
     63     SkGlyphID fGlyphId;
     64     SkUnichar fUnicode;
     65 };
     66 
     67 struct BFRange {
     68     SkGlyphID fStart;
     69     SkGlyphID fEnd;
     70     SkUnichar fUnicode;
     71 };
     72 }  // namespace
     73 
     74 static void write_glyph(SkDynamicMemoryWStream* cmap,
     75                         bool multiByte,
     76                         SkGlyphID gid) {
     77     if (multiByte) {
     78         SkPDFUtils::WriteUInt16BE(cmap, gid);
     79     } else {
     80         SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
     81     }
     82 }
     83 
     84 static void append_bfchar_section(const std::vector<BFChar>& bfchar,
     85                                   bool multiByte,
     86                                   SkDynamicMemoryWStream* cmap) {
     87     // PDF spec defines that every bf* list can have at most 100 entries.
     88     for (size_t i = 0; i < bfchar.size(); i += 100) {
     89         int count = SkToInt(bfchar.size() - i);
     90         count = SkMin32(count, 100);
     91         cmap->writeDecAsText(count);
     92         cmap->writeText(" beginbfchar\n");
     93         for (int j = 0; j < count; ++j) {
     94             cmap->writeText("<");
     95             write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
     96             cmap->writeText("> <");
     97             SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
     98             cmap->writeText(">\n");
     99         }
    100         cmap->writeText("endbfchar\n");
    101     }
    102 }
    103 
    104 static void append_bfrange_section(const std::vector<BFRange>& bfrange,
    105                                    bool multiByte,
    106                                    SkDynamicMemoryWStream* cmap) {
    107     // PDF spec defines that every bf* list can have at most 100 entries.
    108     for (size_t i = 0; i < bfrange.size(); i += 100) {
    109         int count = SkToInt(bfrange.size() - i);
    110         count = SkMin32(count, 100);
    111         cmap->writeDecAsText(count);
    112         cmap->writeText(" beginbfrange\n");
    113         for (int j = 0; j < count; ++j) {
    114             cmap->writeText("<");
    115             write_glyph(cmap, multiByte, bfrange[i + j].fStart);
    116             cmap->writeText("> <");
    117             write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
    118             cmap->writeText("> <");
    119             SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
    120             cmap->writeText(">\n");
    121         }
    122         cmap->writeText("endbfrange\n");
    123     }
    124 }
    125 
    126 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
    127 // Technote 5014.
    128 // The function is not static so we can test it in unit tests.
    129 //
    130 // Current implementation guarantees bfchar and bfrange entries do not overlap.
    131 //
    132 // Current implementation does not attempt aggressive optimizations against
    133 // following case because the specification is not clear.
    134 //
    135 // 4 beginbfchar          1 beginbfchar
    136 // <0003> <0013>          <0020> <0014>
    137 // <0005> <0015>    to    endbfchar
    138 // <0007> <0017>          1 beginbfrange
    139 // <0020> <0014>          <0003> <0007> <0013>
    140 // endbfchar              endbfrange
    141 //
    142 // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
    143 // overlap, but succeeding maps supersede preceding maps."
    144 //
    145 // In case of searching text in PDF, bfrange will have higher precedence so
    146 // typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
    147 // the spec does not mention how will this kind of conflict being resolved.
    148 //
    149 // For the worst case (having 65536 continuous unicode and we use every other
    150 // one of them), the possible savings by aggressive optimization is 416KB
    151 // pre-compressed and does not provide enough motivation for implementation.
    152 void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
    153                              const SkPDFGlyphUse* subset,
    154                              SkDynamicMemoryWStream* cmap,
    155                              bool multiByteGlyphs,
    156                              SkGlyphID firstGlyphID,
    157                              SkGlyphID lastGlyphID) {
    158     int glyphOffset = 0;
    159     if (!multiByteGlyphs) {
    160         glyphOffset = firstGlyphID - 1;
    161     }
    162 
    163     std::vector<BFChar> bfcharEntries;
    164     std::vector<BFRange> bfrangeEntries;
    165 
    166     BFRange currentRangeEntry = {0, 0, 0};
    167     bool rangeEmpty = true;
    168     const int limit = (int)lastGlyphID + 1 - glyphOffset;
    169 
    170     for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
    171         SkGlyphID gid = i + glyphOffset;
    172         bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
    173         if (!rangeEmpty) {
    174             // PDF spec requires bfrange not changing the higher byte,
    175             // e.g. <1035> <10FF> <2222> is ok, but
    176             //      <1035> <1100> <2222> is no good
    177             bool inRange =
    178                 i == currentRangeEntry.fEnd + 1 &&
    179                 i >> 8 == currentRangeEntry.fStart >> 8 &&
    180                 i < limit &&
    181                 glyphToUnicode[gid] ==
    182                     currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
    183             if (!inSubset || !inRange) {
    184                 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
    185                     bfrangeEntries.push_back(currentRangeEntry);
    186                 } else {
    187                     bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
    188                 }
    189                 rangeEmpty = true;
    190             }
    191         }
    192         if (inSubset) {
    193             currentRangeEntry.fEnd = i;
    194             if (rangeEmpty) {
    195               currentRangeEntry.fStart = i;
    196               currentRangeEntry.fUnicode = glyphToUnicode[gid];
    197               rangeEmpty = false;
    198             }
    199         }
    200     }
    201 
    202     // The spec requires all bfchar entries for a font must come before bfrange
    203     // entries.
    204     append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
    205     append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
    206 }
    207 
    208 std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
    209         const SkUnichar* glyphToUnicode,
    210         const SkPDFGlyphUse* subset,
    211         bool multiByteGlyphs,
    212         SkGlyphID firstGlyphID,
    213         SkGlyphID lastGlyphID) {
    214     SkDynamicMemoryWStream cmap;
    215     append_tounicode_header(&cmap, multiByteGlyphs);
    216     SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
    217                             firstGlyphID, lastGlyphID);
    218     append_cmap_footer(&cmap);
    219     return cmap.detachAsStream();
    220 }
    221