Home | History | Annotate | Download | only in pdf
      1 /*
      2  * Copyright 2015 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkPDFMetadata.h"
      9 #include "SkPDFTypes.h"
     10 #include <utility>
     11 
     12 #ifdef SK_PDF_GENERATE_PDFA
     13 #include "SkMD5.h"
     14 #endif
     15 
     16 static SkString pdf_date(const SkTime::DateTime& dt) {
     17     int timeZoneMinutes = SkToInt(dt.fTimeZoneMinutes);
     18     char timezoneSign = timeZoneMinutes >= 0 ? '+' : '-';
     19     int timeZoneHours = SkTAbs(timeZoneMinutes) / 60;
     20     timeZoneMinutes = SkTAbs(timeZoneMinutes) % 60;
     21     return SkStringPrintf(
     22             "D:%04u%02u%02u%02u%02u%02u%c%02d'%02d'",
     23             static_cast<unsigned>(dt.fYear), static_cast<unsigned>(dt.fMonth),
     24             static_cast<unsigned>(dt.fDay), static_cast<unsigned>(dt.fHour),
     25             static_cast<unsigned>(dt.fMinute),
     26             static_cast<unsigned>(dt.fSecond), timezoneSign, timeZoneHours,
     27             timeZoneMinutes);
     28 }
     29 
     30 SkPDFObject* SkPDFMetadata::createDocumentInformationDict() const {
     31     SkAutoTUnref<SkPDFDict> dict(new SkPDFDict);
     32     static const char* keys[] = {
     33             "Title", "Author", "Subject", "Keywords", "Creator"};
     34     for (const char* key : keys) {
     35         for (const SkDocument::Attribute& keyValue : fInfo) {
     36             if (keyValue.fKey.equals(key)) {
     37                 dict->insertString(key, keyValue.fValue);
     38             }
     39         }
     40     }
     41     dict->insertString("Producer", "Skia/PDF");
     42     if (fCreation) {
     43         dict->insertString("CreationDate", pdf_date(*fCreation.get()));
     44     }
     45     if (fModified) {
     46         dict->insertString("ModDate", pdf_date(*fModified.get()));
     47     }
     48     return dict.detach();
     49 }
     50 
     51 #ifdef SK_PDF_GENERATE_PDFA
     52 SkPDFMetadata::UUID SkPDFMetadata::uuid() const {
     53     // The main requirement is for the UUID to be unique; the exact
     54     // format of the data that will be hashed is not important.
     55     SkMD5 md5;
     56     const char uuidNamespace[] = "org.skia.pdf\n";
     57     md5.write(uuidNamespace, strlen(uuidNamespace));
     58     SkMSec msec = SkTime::GetMSecs();
     59     md5.write(&msec, sizeof(msec));
     60     SkTime::DateTime dateTime;
     61     SkTime::GetDateTime(&dateTime);
     62     md5.write(&dateTime, sizeof(dateTime));
     63     if (fCreation) {
     64         md5.write(fCreation.get(), sizeof(fCreation));
     65     }
     66     if (fModified) {
     67         md5.write(fModified.get(), sizeof(fModified));
     68     }
     69     for (const auto& kv : fInfo) {
     70         md5.write(kv.fKey.c_str(), kv.fKey.size());
     71         md5.write("\037", 1);
     72         md5.write(kv.fValue.c_str(), kv.fValue.size());
     73         md5.write("\036", 1);
     74     }
     75     SkMD5::Digest digest;
     76     md5.finish(digest);
     77     // See RFC 4122, page 6-7.
     78     digest.data[6] = (digest.data[6] & 0x0F) | 0x30;
     79     digest.data[8] = (digest.data[6] & 0x3F) | 0x80;
     80     static_assert(sizeof(digest) == sizeof(UUID), "uuid_size");
     81     SkPDFMetadata::UUID uuid;
     82     memcpy(&uuid, &digest, sizeof(digest));
     83     return uuid;
     84 }
     85 
     86 SkPDFObject* SkPDFMetadata::CreatePdfId(const UUID& doc, const UUID& instance) {
     87     // /ID [ <81b14aafa313db63dbd6f981e49f94f4>
     88     //       <81b14aafa313db63dbd6f981e49f94f4> ]
     89     SkAutoTUnref<SkPDFArray> array(new SkPDFArray);
     90     static_assert(sizeof(UUID) == 16, "uuid_size");
     91     array->appendString(
     92             SkString(reinterpret_cast<const char*>(&doc), sizeof(UUID)));
     93     array->appendString(
     94             SkString(reinterpret_cast<const char*>(&instance), sizeof(UUID)));
     95     return array.detach();
     96 }
     97 
     98 // Improvement on SkStringPrintf to allow for arbitrarily long output.
     99 // TODO: replace SkStringPrintf.
    100 static SkString sk_string_printf(const char* format, ...) {
    101 #ifdef SK_BUILD_FOR_WIN
    102     va_list args;
    103     va_start(args, format);
    104     char buffer[1024];
    105     int length = _vsnprintf_s(buffer, sizeof(buffer), _TRUNCATE, format, args);
    106     va_end(args);
    107     if (length >= 0 && length < (int)sizeof(buffer)) {
    108         return SkString(buffer, length);
    109     }
    110     va_start(args, format);
    111     length = _vscprintf(format, args);
    112     va_end(args);
    113 
    114     SkString string((size_t)length);
    115     va_start(args, format);
    116     SkDEBUGCODE(int check = ) _vsnprintf_s(string.writable_str(), length + 1,
    117                                            _TRUNCATE, format, args);
    118     va_end(args);
    119     SkASSERT(check == length);
    120     SkASSERT(string[length] == '\0');
    121     return std::move(string);
    122 #else  // C99/C++11 standard vsnprintf
    123     // TODO: When all compilers support this, remove windows-specific code.
    124     va_list args;
    125     va_start(args, format);
    126     char buffer[1024];
    127     int length = vsnprintf(buffer, sizeof(buffer), format, args);
    128     va_end(args);
    129     if (length < 0) {
    130         return SkString();
    131     }
    132     if (length < (int)sizeof(buffer)) {
    133         return SkString(buffer, length);
    134     }
    135     SkString string((size_t)length);
    136     va_start(args, format);
    137     SkDEBUGCODE(int check = )
    138             vsnprintf(string.writable_str(), length + 1, format, args);
    139     va_end(args);
    140     SkASSERT(check == length);
    141     SkASSERT(string[length] == '\0');
    142     return std::move(string);
    143 #endif
    144 }
    145 
    146 static const SkString get(const SkTArray<SkDocument::Attribute>& info,
    147                           const char* key) {
    148     for (const auto& keyValue : info) {
    149         if (keyValue.fKey.equals(key)) {
    150             return keyValue.fValue;
    151         }
    152     }
    153     return SkString();
    154 }
    155 
    156 #define HEXIFY(INPUT_PTR, OUTPUT_PTR, HEX_STRING, BYTE_COUNT) \
    157     do {                                                      \
    158         for (int i = 0; i < (BYTE_COUNT); ++i) {              \
    159             uint8_t value = *(INPUT_PTR)++;                   \
    160             *(OUTPUT_PTR)++ = (HEX_STRING)[value >> 4];       \
    161             *(OUTPUT_PTR)++ = (HEX_STRING)[value & 0xF];      \
    162         }                                                     \
    163     } while (false)
    164 static SkString uuid_to_string(const SkPDFMetadata::UUID& uuid) {
    165     //  8-4-4-4-12
    166     char buffer[36];  // [32 + 4]
    167     static const char gHex[] = "0123456789abcdef";
    168     SkASSERT(strlen(gHex) == 16);
    169     char* ptr = buffer;
    170     const uint8_t* data = uuid.fData;
    171     HEXIFY(data, ptr, gHex, 4);
    172     *ptr++ = '-';
    173     HEXIFY(data, ptr, gHex, 2);
    174     *ptr++ = '-';
    175     HEXIFY(data, ptr, gHex, 2);
    176     *ptr++ = '-';
    177     HEXIFY(data, ptr, gHex, 2);
    178     *ptr++ = '-';
    179     HEXIFY(data, ptr, gHex, 6);
    180     SkASSERT(ptr == buffer + 36);
    181     SkASSERT(data == uuid.fData + 16);
    182     return SkString(buffer, 36);
    183 }
    184 #undef HEXIFY
    185 
    186 namespace {
    187 class PDFXMLObject final : public SkPDFObject {
    188 public:
    189     PDFXMLObject(SkString xml) : fXML(std::move(xml)) {}
    190     void emitObject(SkWStream* stream,
    191                     const SkPDFObjNumMap& omap,
    192                     const SkPDFSubstituteMap& smap) const override {
    193         SkPDFDict dict("Metadata");
    194         dict.insertName("Subtype", "XML");
    195         dict.insertInt("Length", fXML.size());
    196         dict.emitObject(stream, omap, smap);
    197         static const char streamBegin[] = " stream\n";
    198         stream->write(streamBegin, strlen(streamBegin));
    199         // Do not compress this.  The standard requires that a
    200         // program that does not understand PDF can grep for
    201         // "<?xpacket" and extrac the entire XML.
    202         stream->write(fXML.c_str(), fXML.size());
    203         static const char streamEnd[] = "\nendstream";
    204         stream->write(streamEnd, strlen(streamEnd));
    205     }
    206 
    207 private:
    208     const SkString fXML;
    209 };
    210 }  // namespace
    211 
    212 static int count_xml_escape_size(const SkString& input) {
    213     int extra = 0;
    214     for (size_t i = 0; i < input.size(); ++i) {
    215         if (input[i] == '&') {
    216             extra += 4;  // strlen("&amp;") - strlen("&")
    217         } else if (input[i] == '<') {
    218             extra += 3;  // strlen("&lt;") - strlen("<")
    219         }
    220     }
    221     return extra;
    222 }
    223 
    224 const SkString escape_xml(const SkString& input,
    225                           const char* before = nullptr,
    226                           const char* after = nullptr) {
    227     if (input.size() == 0) {
    228         return input;
    229     }
    230     // "&" --> "&amp;" and  "<" --> "&lt;"
    231     // text is assumed to be in UTF-8
    232     // all strings are xml content, not attribute values.
    233     size_t beforeLen = before ? strlen(before) : 0;
    234     size_t afterLen = after ? strlen(after) : 0;
    235     int extra = count_xml_escape_size(input);
    236     SkString output(input.size() + extra + beforeLen + afterLen);
    237     char* out = output.writable_str();
    238     if (before) {
    239         strncpy(out, before, beforeLen);
    240         out += beforeLen;
    241     }
    242     static const char kAmp[] = "&amp;";
    243     static const char kLt[] = "&lt;";
    244     for (size_t i = 0; i < input.size(); ++i) {
    245         if (input[i] == '&') {
    246             strncpy(out, kAmp, strlen(kAmp));
    247             out += strlen(kAmp);
    248         } else if (input[i] == '<') {
    249             strncpy(out, kLt, strlen(kLt));
    250             out += strlen(kLt);
    251         } else {
    252             *out++ = input[i];
    253         }
    254     }
    255     if (after) {
    256         strncpy(out, after, afterLen);
    257         out += afterLen;
    258     }
    259     // Validate that we haven't written outside of our string.
    260     SkASSERT(out == &output.writable_str()[output.size()]);
    261     *out = '\0';
    262     return std::move(output);
    263 }
    264 
    265 SkPDFObject* SkPDFMetadata::createXMPObject(const UUID& doc,
    266                                             const UUID& instance) const {
    267     static const char templateString[] =
    268             "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
    269             "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n"
    270             " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, "
    271             "2012/08/23-13:03:03\">\n"
    272             "<rdf:RDF "
    273             "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
    274             "<rdf:Description rdf:about=\"\"\n"
    275             " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n"
    276             " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"
    277             " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n"
    278             " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n"
    279             " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n"
    280             "<pdfaid:part>2</pdfaid:part>\n"
    281             "<pdfaid:conformance>B</pdfaid:conformance>\n"
    282             "%s"  // ModifyDate
    283             "%s"  // CreateDate
    284             "%s"  // MetadataDate
    285             "%s"  // xmp:CreatorTool
    286             "<dc:format>application/pdf</dc:format>\n"
    287             "%s"  // dc:title
    288             "%s"  // dc:description
    289             "%s"  // author
    290             "%s"  // keywords
    291             "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n"
    292             "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n"
    293             "<pdf:Producer>Skia/PDF</pdf:Producer>\n"
    294             "%s"  // pdf:Keywords
    295             "</rdf:Description>\n"
    296             "</rdf:RDF>\n"
    297             "</x:xmpmeta>\n"  // Note:  the standard suggests 4k of padding.
    298             "<?xpacket end=\"w\"?>\n";
    299 
    300     SkString creationDate;
    301     SkString modificationDate;
    302     SkString metadataDate;
    303     if (fCreation) {
    304         SkString tmp;
    305         fCreation->toISO8601(&tmp);
    306         SkASSERT(0 == count_xml_escape_size(tmp));
    307         // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape
    308         creationDate = sk_string_printf("<xmp:CreateDate>%s</xmp:CreateDate>\n",
    309                                         tmp.c_str());
    310     }
    311     if (fModified) {
    312         SkString tmp;
    313         fModified->toISO8601(&tmp);
    314         SkASSERT(0 == count_xml_escape_size(tmp));
    315         modificationDate = sk_string_printf(
    316                 "<xmp:ModifyDate>%s</xmp:ModifyDate>\n", tmp.c_str());
    317         metadataDate = sk_string_printf(
    318                 "<xmp:MetadataDate>%s</xmp:MetadataDate>\n", tmp.c_str());
    319     }
    320 
    321     SkString title =
    322             escape_xml(get(fInfo, "Title"), "<dc:title><rdf:Alt><rdf:li>",
    323                        "</rdf:li></rdf:Alt></dc:title>\n");
    324     SkString author =
    325             escape_xml(get(fInfo, "Author"), "<dc:creator><rdf:Bag><rdf:li>",
    326                        "</rdf:li></rdf:Bag></dc:creator>\n");
    327     // TODO: in theory, XMP can support multiple authors.  Split on a delimiter?
    328     SkString subject = escape_xml(get(fInfo, "Subject"),
    329                                   "<dc:description><rdf:Alt><rdf:li>",
    330                                   "</rdf:li></rdf:Alt></dc:description>\n");
    331     SkString keywords1 =
    332             escape_xml(get(fInfo, "Keywords"), "<dc:subject><rdf:Bag><rdf:li>",
    333                        "</rdf:li></rdf:Bag></dc:subject>\n");
    334     SkString keywords2 = escape_xml(get(fInfo, "Keywords"), "<pdf:Keywords>",
    335                                     "</pdf:Keywords>\n");
    336 
    337     // TODO: in theory, keywords can be a list too.
    338     SkString creator = escape_xml(get(fInfo, "Creator"), "<xmp:CreatorTool>",
    339                                   "</xmp:CreatorTool>\n");
    340     SkString documentID = uuid_to_string(doc);  // no need to escape
    341     SkASSERT(0 == count_xml_escape_size(documentID));
    342     SkString instanceID = uuid_to_string(instance);
    343     SkASSERT(0 == count_xml_escape_size(instanceID));
    344     return new PDFXMLObject(sk_string_printf(
    345             templateString, modificationDate.c_str(), creationDate.c_str(),
    346             metadataDate.c_str(), creator.c_str(), title.c_str(),
    347             subject.c_str(), author.c_str(), keywords1.c_str(),
    348             documentID.c_str(), instanceID.c_str(), keywords2.c_str()));
    349 }
    350 
    351 #endif  // SK_PDF_GENERATE_PDFA
    352