Home | History | Annotate | Download | only in pdf
      1 /*
      2  * Copyright 2015 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkMD5.h"
      9 #include "SkMilestone.h"
     10 #include "SkPDFMetadata.h"
     11 #include "SkPDFTypes.h"
     12 #include "SkUtils.h"
     13 
     14 #include <utility>
     15 
     16 #define SKPDF_STRING(X) SKPDF_STRING_IMPL(X)
     17 #define SKPDF_STRING_IMPL(X) #X
     18 #define SKPDF_PRODUCER "Skia/PDF m" SKPDF_STRING(SK_MILESTONE)
     19 #define SKPDF_CUSTOM_PRODUCER_KEY "ProductionLibrary"
     20 
     21 static SkString pdf_date(const SkTime::DateTime& dt) {
     22     int timeZoneMinutes = SkToInt(dt.fTimeZoneMinutes);
     23     char timezoneSign = timeZoneMinutes >= 0 ? '+' : '-';
     24     int timeZoneHours = SkTAbs(timeZoneMinutes) / 60;
     25     timeZoneMinutes = SkTAbs(timeZoneMinutes) % 60;
     26     return SkStringPrintf(
     27             "D:%04u%02u%02u%02u%02u%02u%c%02d'%02d'",
     28             static_cast<unsigned>(dt.fYear), static_cast<unsigned>(dt.fMonth),
     29             static_cast<unsigned>(dt.fDay), static_cast<unsigned>(dt.fHour),
     30             static_cast<unsigned>(dt.fMinute),
     31             static_cast<unsigned>(dt.fSecond), timezoneSign, timeZoneHours,
     32             timeZoneMinutes);
     33 }
     34 
     35 namespace {
     36 static const struct {
     37     const char* const key;
     38     SkString SkDocument::PDFMetadata::*const valuePtr;
     39 } gMetadataKeys[] = {
     40         {"Title", &SkDocument::PDFMetadata::fTitle},
     41         {"Author", &SkDocument::PDFMetadata::fAuthor},
     42         {"Subject", &SkDocument::PDFMetadata::fSubject},
     43         {"Keywords", &SkDocument::PDFMetadata::fKeywords},
     44         {"Creator", &SkDocument::PDFMetadata::fCreator},
     45 };
     46 }  // namespace
     47 
     48 sk_sp<SkPDFObject> SkPDFMetadata::MakeDocumentInformationDict(
     49         const SkDocument::PDFMetadata& metadata) {
     50     auto dict = sk_make_sp<SkPDFDict>();
     51     for (const auto keyValuePtr : gMetadataKeys) {
     52         const SkString& value = metadata.*(keyValuePtr.valuePtr);
     53         if (value.size() > 0) {
     54             dict->insertString(keyValuePtr.key, value);
     55         }
     56     }
     57     if (metadata.fProducer.isEmpty()) {
     58         dict->insertString("Producer", SKPDF_PRODUCER);
     59     } else {
     60         dict->insertString("Producer", metadata.fProducer);
     61         dict->insertString(SKPDF_CUSTOM_PRODUCER_KEY, SKPDF_PRODUCER);
     62     }
     63     if (metadata.fCreation.fEnabled) {
     64         dict->insertString("CreationDate",
     65                            pdf_date(metadata.fCreation.fDateTime));
     66     }
     67     if (metadata.fModified.fEnabled) {
     68         dict->insertString("ModDate", pdf_date(metadata.fModified.fDateTime));
     69     }
     70     return dict;
     71 }
     72 
     73 SkPDFMetadata::UUID SkPDFMetadata::CreateUUID(
     74         const SkDocument::PDFMetadata& metadata) {
     75     // The main requirement is for the UUID to be unique; the exact
     76     // format of the data that will be hashed is not important.
     77     SkMD5 md5;
     78     const char uuidNamespace[] = "org.skia.pdf\n";
     79     md5.write(uuidNamespace, strlen(uuidNamespace));
     80     double msec = SkTime::GetMSecs();
     81     md5.write(&msec, sizeof(msec));
     82     SkTime::DateTime dateTime;
     83     SkTime::GetDateTime(&dateTime);
     84     md5.write(&dateTime, sizeof(dateTime));
     85     if (metadata.fCreation.fEnabled) {
     86         md5.write(&metadata.fCreation.fDateTime,
     87                   sizeof(metadata.fCreation.fDateTime));
     88     }
     89     if (metadata.fModified.fEnabled) {
     90         md5.write(&metadata.fModified.fDateTime,
     91                   sizeof(metadata.fModified.fDateTime));
     92     }
     93 
     94     for (const auto keyValuePtr : gMetadataKeys) {
     95         md5.write(keyValuePtr.key, strlen(keyValuePtr.key));
     96         md5.write("\037", 1);
     97         const SkString& value = metadata.*(keyValuePtr.valuePtr);
     98         md5.write(value.c_str(), value.size());
     99         md5.write("\036", 1);
    100     }
    101     SkMD5::Digest digest;
    102     md5.finish(digest);
    103     // See RFC 4122, page 6-7.
    104     digest.data[6] = (digest.data[6] & 0x0F) | 0x30;
    105     digest.data[8] = (digest.data[6] & 0x3F) | 0x80;
    106     static_assert(sizeof(digest) == sizeof(UUID), "uuid_size");
    107     SkPDFMetadata::UUID uuid;
    108     memcpy(&uuid, &digest, sizeof(digest));
    109     return uuid;
    110 }
    111 
    112 sk_sp<SkPDFObject> SkPDFMetadata::MakePdfId(const UUID& doc,
    113                                             const UUID& instance) {
    114     // /ID [ <81b14aafa313db63dbd6f981e49f94f4>
    115     //       <81b14aafa313db63dbd6f981e49f94f4> ]
    116     auto array = sk_make_sp<SkPDFArray>();
    117     static_assert(sizeof(SkPDFMetadata::UUID) == 16, "uuid_size");
    118     array->appendString(
    119             SkString(reinterpret_cast<const char*>(&doc), sizeof(UUID)));
    120     array->appendString(
    121             SkString(reinterpret_cast<const char*>(&instance), sizeof(UUID)));
    122     return array;
    123 }
    124 
    125 // Convert a block of memory to hexadecimal.  Input and output pointers will be
    126 // moved to end of the range.
    127 static void hexify(const uint8_t** inputPtr, char** outputPtr, int count) {
    128     SkASSERT(inputPtr && *inputPtr);
    129     SkASSERT(outputPtr && *outputPtr);
    130     while (count-- > 0) {
    131         uint8_t value = *(*inputPtr)++;
    132         *(*outputPtr)++ = SkHexadecimalDigits::gLower[value >> 4];
    133         *(*outputPtr)++ = SkHexadecimalDigits::gLower[value & 0xF];
    134     }
    135 }
    136 
    137 static SkString uuid_to_string(const SkPDFMetadata::UUID& uuid) {
    138     //  8-4-4-4-12
    139     char buffer[36];  // [32 + 4]
    140     char* ptr = buffer;
    141     const uint8_t* data = uuid.fData;
    142     hexify(&data, &ptr, 4);
    143     *ptr++ = '-';
    144     hexify(&data, &ptr, 2);
    145     *ptr++ = '-';
    146     hexify(&data, &ptr, 2);
    147     *ptr++ = '-';
    148     hexify(&data, &ptr, 2);
    149     *ptr++ = '-';
    150     hexify(&data, &ptr, 6);
    151     SkASSERT(ptr == buffer + 36);
    152     SkASSERT(data == uuid.fData + 16);
    153     return SkString(buffer, 36);
    154 }
    155 
    156 namespace {
    157 class PDFXMLObject final : public SkPDFObject {
    158 public:
    159     PDFXMLObject(SkString xml) : fXML(std::move(xml)) {}
    160     void emitObject(SkWStream* stream,
    161                     const SkPDFObjNumMap& omap) const override {
    162         SkPDFDict dict("Metadata");
    163         dict.insertName("Subtype", "XML");
    164         dict.insertInt("Length", fXML.size());
    165         dict.emitObject(stream, omap);
    166         static const char streamBegin[] = " stream\n";
    167         stream->write(streamBegin, strlen(streamBegin));
    168         // Do not compress this.  The standard requires that a
    169         // program that does not understand PDF can grep for
    170         // "<?xpacket" and extract the entire XML.
    171         stream->write(fXML.c_str(), fXML.size());
    172         static const char streamEnd[] = "\nendstream";
    173         stream->write(streamEnd, strlen(streamEnd));
    174     }
    175 
    176 private:
    177     const SkString fXML;
    178 };
    179 }  // namespace
    180 
    181 static int count_xml_escape_size(const SkString& input) {
    182     int extra = 0;
    183     for (size_t i = 0; i < input.size(); ++i) {
    184         if (input[i] == '&') {
    185             extra += 4;  // strlen("&amp;") - strlen("&")
    186         } else if (input[i] == '<') {
    187             extra += 3;  // strlen("&lt;") - strlen("<")
    188         }
    189     }
    190     return extra;
    191 }
    192 
    193 const SkString escape_xml(const SkString& input,
    194                           const char* before = nullptr,
    195                           const char* after = nullptr) {
    196     if (input.size() == 0) {
    197         return input;
    198     }
    199     // "&" --> "&amp;" and  "<" --> "&lt;"
    200     // text is assumed to be in UTF-8
    201     // all strings are xml content, not attribute values.
    202     size_t beforeLen = before ? strlen(before) : 0;
    203     size_t afterLen = after ? strlen(after) : 0;
    204     int extra = count_xml_escape_size(input);
    205     SkString output(input.size() + extra + beforeLen + afterLen);
    206     char* out = output.writable_str();
    207     if (before) {
    208         strncpy(out, before, beforeLen);
    209         out += beforeLen;
    210     }
    211     static const char kAmp[] = "&amp;";
    212     static const char kLt[] = "&lt;";
    213     for (size_t i = 0; i < input.size(); ++i) {
    214         if (input[i] == '&') {
    215             strncpy(out, kAmp, strlen(kAmp));
    216             out += strlen(kAmp);
    217         } else if (input[i] == '<') {
    218             strncpy(out, kLt, strlen(kLt));
    219             out += strlen(kLt);
    220         } else {
    221             *out++ = input[i];
    222         }
    223     }
    224     if (after) {
    225         strncpy(out, after, afterLen);
    226         out += afterLen;
    227     }
    228     // Validate that we haven't written outside of our string.
    229     SkASSERT(out == &output.writable_str()[output.size()]);
    230     *out = '\0';
    231     return output;
    232 }
    233 
    234 sk_sp<SkPDFObject> SkPDFMetadata::MakeXMPObject(
    235         const SkDocument::PDFMetadata& metadata,
    236         const UUID& doc,
    237         const UUID& instance) {
    238     static const char templateString[] =
    239             "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
    240             "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n"
    241             " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, "
    242             "2012/08/23-13:03:03\">\n"
    243             "<rdf:RDF "
    244             "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
    245             "<rdf:Description rdf:about=\"\"\n"
    246             " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n"
    247             " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"
    248             " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n"
    249             " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n"
    250             " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n"
    251             "<pdfaid:part>2</pdfaid:part>\n"
    252             "<pdfaid:conformance>B</pdfaid:conformance>\n"
    253             "%s"  // ModifyDate
    254             "%s"  // CreateDate
    255             "%s"  // xmp:CreatorTool
    256             "<dc:format>application/pdf</dc:format>\n"
    257             "%s"  // dc:title
    258             "%s"  // dc:description
    259             "%s"  // author
    260             "%s"  // keywords
    261             "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n"
    262             "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n"
    263             "%s"  // pdf:Producer
    264             "%s"  // pdf:Keywords
    265             "</rdf:Description>\n"
    266             "</rdf:RDF>\n"
    267             "</x:xmpmeta>\n"  // Note:  the standard suggests 4k of padding.
    268             "<?xpacket end=\"w\"?>\n";
    269 
    270     SkString creationDate;
    271     SkString modificationDate;
    272     if (metadata.fCreation.fEnabled) {
    273         SkString tmp;
    274         metadata.fCreation.fDateTime.toISO8601(&tmp);
    275         SkASSERT(0 == count_xml_escape_size(tmp));
    276         // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape
    277         creationDate = SkStringPrintf("<xmp:CreateDate>%s</xmp:CreateDate>\n",
    278                                       tmp.c_str());
    279     }
    280     if (metadata.fModified.fEnabled) {
    281         SkString tmp;
    282         metadata.fModified.fDateTime.toISO8601(&tmp);
    283         SkASSERT(0 == count_xml_escape_size(tmp));
    284         modificationDate = SkStringPrintf(
    285                 "<xmp:ModifyDate>%s</xmp:ModifyDate>\n", tmp.c_str());
    286     }
    287     SkString title =
    288             escape_xml(metadata.fTitle,
    289                        "<dc:title><rdf:Alt><rdf:li xml:lang=\"x-default\">",
    290                        "</rdf:li></rdf:Alt></dc:title>\n");
    291     SkString author =
    292             escape_xml(metadata.fAuthor, "<dc:creator><rdf:Bag><rdf:li>",
    293                        "</rdf:li></rdf:Bag></dc:creator>\n");
    294     // TODO: in theory, XMP can support multiple authors.  Split on a delimiter?
    295     SkString subject = escape_xml(
    296             metadata.fSubject,
    297             "<dc:description><rdf:Alt><rdf:li xml:lang=\"x-default\">",
    298             "</rdf:li></rdf:Alt></dc:description>\n");
    299     SkString keywords1 =
    300             escape_xml(metadata.fKeywords, "<dc:subject><rdf:Bag><rdf:li>",
    301                        "</rdf:li></rdf:Bag></dc:subject>\n");
    302     SkString keywords2 = escape_xml(metadata.fKeywords, "<pdf:Keywords>",
    303                                     "</pdf:Keywords>\n");
    304     // TODO: in theory, keywords can be a list too.
    305 
    306     SkString producer("<pdf:Producer>" SKPDF_PRODUCER "</pdf:Producer>\n");
    307     if (!metadata.fProducer.isEmpty()) {
    308         // TODO: register a developer prefix to make
    309         // <skia:SKPDF_CUSTOM_PRODUCER_KEY> a real XML tag.
    310         producer = escape_xml(
    311                 metadata.fProducer, "<pdf:Producer>",
    312                 "</pdf:Producer>\n<!-- <skia:" SKPDF_CUSTOM_PRODUCER_KEY ">"
    313                 SKPDF_PRODUCER "</skia:" SKPDF_CUSTOM_PRODUCER_KEY "> -->\n");
    314     }
    315 
    316     SkString creator = escape_xml(metadata.fCreator, "<xmp:CreatorTool>",
    317                                   "</xmp:CreatorTool>\n");
    318     SkString documentID = uuid_to_string(doc);  // no need to escape
    319     SkASSERT(0 == count_xml_escape_size(documentID));
    320     SkString instanceID = uuid_to_string(instance);
    321     SkASSERT(0 == count_xml_escape_size(instanceID));
    322     return sk_make_sp<PDFXMLObject>(SkStringPrintf(
    323             templateString, modificationDate.c_str(), creationDate.c_str(),
    324             creator.c_str(), title.c_str(), subject.c_str(), author.c_str(),
    325             keywords1.c_str(), documentID.c_str(), instanceID.c_str(),
    326             producer.c_str(), keywords2.c_str()));
    327 }
    328 
    329 #undef SKPDF_CUSTOM_PRODUCER_KEY
    330 #undef SKPDF_PRODUCER
    331 #undef SKPDF_STRING
    332 #undef SKPDF_STRING_IMPL
    333