Home | History | Annotate | Download | only in pdf
      1 /*
      2  * Copyright 2011 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkPDFMakeToUnicodeCmap.h"
      9 #include "SkPDFUtils.h"
     10 #include "SkUtils.h"
     11 
     12 static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
     13                                     bool multibyte) {
     14     // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
     15     // It's there to prevent old version Adobe Readers from malfunctioning.
     16     const char* kHeader =
     17         "/CIDInit /ProcSet findresource begin\n"
     18         "12 dict begin\n"
     19         "begincmap\n";
     20     cmap->writeText(kHeader);
     21 
     22     // The /CIDSystemInfo must be consistent to the one in
     23     // SkPDFFont::populateCIDFont().
     24     // We can not pass over the system info object here because the format is
     25     // different. This is not a reference object.
     26     const char* kSysInfo =
     27         "/CIDSystemInfo\n"
     28         "<<  /Registry (Adobe)\n"
     29         "/Ordering (UCS)\n"
     30         "/Supplement 0\n"
     31         ">> def\n";
     32     cmap->writeText(kSysInfo);
     33 
     34     // The CMapName must be consistent to /CIDSystemInfo above.
     35     // /CMapType 2 means ToUnicode.
     36     // Codespace range just tells the PDF processor the valid range.
     37     const char* kTypeInfoHeader =
     38         "/CMapName /Adobe-Identity-UCS def\n"
     39         "/CMapType 2 def\n"
     40         "1 begincodespacerange\n";
     41     cmap->writeText(kTypeInfoHeader);
     42     if (multibyte) {
     43         cmap->writeText("<0000> <FFFF>\n");
     44     } else {
     45         cmap->writeText("<00> <FF>\n");
     46     }
     47     cmap->writeText("endcodespacerange\n");
     48 }
     49 
     50 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
     51     const char kFooter[] =
     52         "endcmap\n"
     53         "CMapName currentdict /CMap defineresource pop\n"
     54         "end\n"
     55         "end";
     56     cmap->writeText(kFooter);
     57 }
     58 
     59 namespace {
     60 struct BFChar {
     61     SkGlyphID fGlyphId;
     62     SkUnichar fUnicode;
     63 };
     64 
     65 struct BFRange {
     66     SkGlyphID fStart;
     67     SkGlyphID fEnd;
     68     SkUnichar fUnicode;
     69 };
     70 }  // namespace
     71 
     72 static void write_glyph(SkDynamicMemoryWStream* cmap,
     73                         bool multiByte,
     74                         SkGlyphID gid) {
     75     if (multiByte) {
     76         SkPDFUtils::WriteUInt16BE(cmap, gid);
     77     } else {
     78         SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
     79     }
     80 }
     81 
     82 static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
     83                                   bool multiByte,
     84                                   SkDynamicMemoryWStream* cmap) {
     85     // PDF spec defines that every bf* list can have at most 100 entries.
     86     for (int i = 0; i < bfchar.count(); i += 100) {
     87         int count = bfchar.count() - i;
     88         count = SkMin32(count, 100);
     89         cmap->writeDecAsText(count);
     90         cmap->writeText(" beginbfchar\n");
     91         for (int j = 0; j < count; ++j) {
     92             cmap->writeText("<");
     93             write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
     94             cmap->writeText("> <");
     95             SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
     96             cmap->writeText(">\n");
     97         }
     98         cmap->writeText("endbfchar\n");
     99     }
    100 }
    101 
    102 static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
    103                                    bool multiByte,
    104                                    SkDynamicMemoryWStream* cmap) {
    105     // PDF spec defines that every bf* list can have at most 100 entries.
    106     for (int i = 0; i < bfrange.count(); i += 100) {
    107         int count = bfrange.count() - i;
    108         count = SkMin32(count, 100);
    109         cmap->writeDecAsText(count);
    110         cmap->writeText(" beginbfrange\n");
    111         for (int j = 0; j < count; ++j) {
    112             cmap->writeText("<");
    113             write_glyph(cmap, multiByte, bfrange[i + j].fStart);
    114             cmap->writeText("> <");
    115             write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
    116             cmap->writeText("> <");
    117             SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
    118             cmap->writeText(">\n");
    119         }
    120         cmap->writeText("endbfrange\n");
    121     }
    122 }
    123 
    124 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
    125 // Technote 5014.
    126 // The function is not static so we can test it in unit tests.
    127 //
    128 // Current implementation guarantees bfchar and bfrange entries do not overlap.
    129 //
    130 // Current implementation does not attempt aggresive optimizations against
    131 // following case because the specification is not clear.
    132 //
    133 // 4 beginbfchar          1 beginbfchar
    134 // <0003> <0013>          <0020> <0014>
    135 // <0005> <0015>    to    endbfchar
    136 // <0007> <0017>          1 beginbfrange
    137 // <0020> <0014>          <0003> <0007> <0013>
    138 // endbfchar              endbfrange
    139 //
    140 // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
    141 // overlap, but succeeding maps supersede preceding maps."
    142 //
    143 // In case of searching text in PDF, bfrange will have higher precedence so
    144 // typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
    145 // the spec does not mention how will this kind of conflict being resolved.
    146 //
    147 // For the worst case (having 65536 continuous unicode and we use every other
    148 // one of them), the possible savings by aggressive optimization is 416KB
    149 // pre-compressed and does not provide enough motivation for implementation.
    150 void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
    151                              const SkBitSet* subset,
    152                              SkDynamicMemoryWStream* cmap,
    153                              bool multiByteGlyphs,
    154                              SkGlyphID firstGlyphID,
    155                              SkGlyphID lastGlyphID) {
    156     if (glyphToUnicode.isEmpty()) {
    157         return;
    158     }
    159     int glyphOffset = 0;
    160     if (!multiByteGlyphs) {
    161         glyphOffset = firstGlyphID - 1;
    162     }
    163 
    164     SkTDArray<BFChar> bfcharEntries;
    165     SkTDArray<BFRange> bfrangeEntries;
    166 
    167     BFRange currentRangeEntry = {0, 0, 0};
    168     bool rangeEmpty = true;
    169     const int limit =
    170             SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
    171 
    172     for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
    173         bool inSubset = i < limit &&
    174                         (subset == nullptr || subset->has(i + glyphOffset));
    175         if (!rangeEmpty) {
    176             // PDF spec requires bfrange not changing the higher byte,
    177             // e.g. <1035> <10FF> <2222> is ok, but
    178             //      <1035> <1100> <2222> is no good
    179             bool inRange =
    180                 i == currentRangeEntry.fEnd + 1 &&
    181                 i >> 8 == currentRangeEntry.fStart >> 8 &&
    182                 i < limit &&
    183                 glyphToUnicode[i + glyphOffset] ==
    184                     currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
    185             if (!inSubset || !inRange) {
    186                 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
    187                     bfrangeEntries.push(currentRangeEntry);
    188                 } else {
    189                     BFChar* entry = bfcharEntries.append();
    190                     entry->fGlyphId = currentRangeEntry.fStart;
    191                     entry->fUnicode = currentRangeEntry.fUnicode;
    192                 }
    193                 rangeEmpty = true;
    194             }
    195         }
    196         if (inSubset) {
    197             currentRangeEntry.fEnd = i;
    198             if (rangeEmpty) {
    199               currentRangeEntry.fStart = i;
    200               currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
    201               rangeEmpty = false;
    202             }
    203         }
    204     }
    205 
    206     // The spec requires all bfchar entries for a font must come before bfrange
    207     // entries.
    208     append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
    209     append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
    210 }
    211 
    212 sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
    213         const SkTDArray<SkUnichar>& glyphToUnicode,
    214         const SkBitSet* subset,
    215         bool multiByteGlyphs,
    216         SkGlyphID firstGlyphID,
    217         SkGlyphID lastGlyphID) {
    218     SkDynamicMemoryWStream cmap;
    219     append_tounicode_header(&cmap, multiByteGlyphs);
    220     SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
    221                             firstGlyphID, lastGlyphID);
    222     append_cmap_footer(&cmap);
    223     return sk_make_sp<SkPDFStream>(
    224             std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
    225 }
    226