Home | History | Annotate | Download | only in pdf
      1 /*
      2  * Copyright 2015 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkMD5.h"
      9 #include "SkMilestone.h"
     10 #include "SkPDFMetadata.h"
     11 #include "SkPDFTypes.h"
     12 #include <utility>
     13 
     14 #define SKPDF_PRODUCER "Skia/PDF [" SK_MILESTONE "]"
     15 #define SKPDF_CUSTOM_PRODUCER_KEY "ProductionLibrary"
     16 
     17 static SkString pdf_date(const SkTime::DateTime& dt) {
     18     int timeZoneMinutes = SkToInt(dt.fTimeZoneMinutes);
     19     char timezoneSign = timeZoneMinutes >= 0 ? '+' : '-';
     20     int timeZoneHours = SkTAbs(timeZoneMinutes) / 60;
     21     timeZoneMinutes = SkTAbs(timeZoneMinutes) % 60;
     22     return SkStringPrintf(
     23             "D:%04u%02u%02u%02u%02u%02u%c%02d'%02d'",
     24             static_cast<unsigned>(dt.fYear), static_cast<unsigned>(dt.fMonth),
     25             static_cast<unsigned>(dt.fDay), static_cast<unsigned>(dt.fHour),
     26             static_cast<unsigned>(dt.fMinute),
     27             static_cast<unsigned>(dt.fSecond), timezoneSign, timeZoneHours,
     28             timeZoneMinutes);
     29 }
     30 
     31 namespace {
     32 static const struct {
     33     const char* const key;
     34     SkString SkDocument::PDFMetadata::*const valuePtr;
     35 } gMetadataKeys[] = {
     36         {"Title", &SkDocument::PDFMetadata::fTitle},
     37         {"Author", &SkDocument::PDFMetadata::fAuthor},
     38         {"Subject", &SkDocument::PDFMetadata::fSubject},
     39         {"Keywords", &SkDocument::PDFMetadata::fKeywords},
     40         {"Creator", &SkDocument::PDFMetadata::fCreator},
     41 };
     42 }  // namespace
     43 
     44 sk_sp<SkPDFObject> SkPDFMetadata::MakeDocumentInformationDict(
     45         const SkDocument::PDFMetadata& metadata) {
     46     auto dict = sk_make_sp<SkPDFDict>();
     47     for (const auto keyValuePtr : gMetadataKeys) {
     48         const SkString& value = metadata.*(keyValuePtr.valuePtr);
     49         if (value.size() > 0) {
     50             dict->insertString(keyValuePtr.key, value);
     51         }
     52     }
     53     if (metadata.fProducer.isEmpty()) {
     54         dict->insertString("Producer", SKPDF_PRODUCER);
     55     } else {
     56         dict->insertString("Producer", metadata.fProducer);
     57         dict->insertString(SKPDF_CUSTOM_PRODUCER_KEY, SKPDF_PRODUCER);
     58     }
     59     if (metadata.fCreation.fEnabled) {
     60         dict->insertString("CreationDate",
     61                            pdf_date(metadata.fCreation.fDateTime));
     62     }
     63     if (metadata.fModified.fEnabled) {
     64         dict->insertString("ModDate", pdf_date(metadata.fModified.fDateTime));
     65     }
     66     return dict;
     67 }
     68 
     69 SkPDFMetadata::UUID SkPDFMetadata::CreateUUID(
     70         const SkDocument::PDFMetadata& metadata) {
     71     // The main requirement is for the UUID to be unique; the exact
     72     // format of the data that will be hashed is not important.
     73     SkMD5 md5;
     74     const char uuidNamespace[] = "org.skia.pdf\n";
     75     md5.write(uuidNamespace, strlen(uuidNamespace));
     76     double msec = SkTime::GetMSecs();
     77     md5.write(&msec, sizeof(msec));
     78     SkTime::DateTime dateTime;
     79     SkTime::GetDateTime(&dateTime);
     80     md5.write(&dateTime, sizeof(dateTime));
     81     if (metadata.fCreation.fEnabled) {
     82         md5.write(&metadata.fCreation.fDateTime,
     83                   sizeof(metadata.fCreation.fDateTime));
     84     }
     85     if (metadata.fModified.fEnabled) {
     86         md5.write(&metadata.fModified.fDateTime,
     87                   sizeof(metadata.fModified.fDateTime));
     88     }
     89 
     90     for (const auto keyValuePtr : gMetadataKeys) {
     91         md5.write(keyValuePtr.key, strlen(keyValuePtr.key));
     92         md5.write("\037", 1);
     93         const SkString& value = metadata.*(keyValuePtr.valuePtr);
     94         md5.write(value.c_str(), value.size());
     95         md5.write("\036", 1);
     96     }
     97     SkMD5::Digest digest;
     98     md5.finish(digest);
     99     // See RFC 4122, page 6-7.
    100     digest.data[6] = (digest.data[6] & 0x0F) | 0x30;
    101     digest.data[8] = (digest.data[6] & 0x3F) | 0x80;
    102     static_assert(sizeof(digest) == sizeof(UUID), "uuid_size");
    103     SkPDFMetadata::UUID uuid;
    104     memcpy(&uuid, &digest, sizeof(digest));
    105     return uuid;
    106 }
    107 
    108 sk_sp<SkPDFObject> SkPDFMetadata::MakePdfId(const UUID& doc,
    109                                             const UUID& instance) {
    110     // /ID [ <81b14aafa313db63dbd6f981e49f94f4>
    111     //       <81b14aafa313db63dbd6f981e49f94f4> ]
    112     auto array = sk_make_sp<SkPDFArray>();
    113     static_assert(sizeof(SkPDFMetadata::UUID) == 16, "uuid_size");
    114     array->appendString(
    115             SkString(reinterpret_cast<const char*>(&doc), sizeof(UUID)));
    116     array->appendString(
    117             SkString(reinterpret_cast<const char*>(&instance), sizeof(UUID)));
    118     return array;
    119 }
    120 
    121 #define HEXIFY(INPUT_PTR, OUTPUT_PTR, HEX_STRING, BYTE_COUNT) \
    122     do {                                                      \
    123         for (int i = 0; i < (BYTE_COUNT); ++i) {              \
    124             uint8_t value = *(INPUT_PTR)++;                   \
    125             *(OUTPUT_PTR)++ = (HEX_STRING)[value >> 4];       \
    126             *(OUTPUT_PTR)++ = (HEX_STRING)[value & 0xF];      \
    127         }                                                     \
    128     } while (false)
    129 static SkString uuid_to_string(const SkPDFMetadata::UUID& uuid) {
    130     //  8-4-4-4-12
    131     char buffer[36];  // [32 + 4]
    132     static const char gHex[] = "0123456789abcdef";
    133     SkASSERT(strlen(gHex) == 16);
    134     char* ptr = buffer;
    135     const uint8_t* data = uuid.fData;
    136     HEXIFY(data, ptr, gHex, 4);
    137     *ptr++ = '-';
    138     HEXIFY(data, ptr, gHex, 2);
    139     *ptr++ = '-';
    140     HEXIFY(data, ptr, gHex, 2);
    141     *ptr++ = '-';
    142     HEXIFY(data, ptr, gHex, 2);
    143     *ptr++ = '-';
    144     HEXIFY(data, ptr, gHex, 6);
    145     SkASSERT(ptr == buffer + 36);
    146     SkASSERT(data == uuid.fData + 16);
    147     return SkString(buffer, 36);
    148 }
    149 #undef HEXIFY
    150 
    151 namespace {
    152 class PDFXMLObject final : public SkPDFObject {
    153 public:
    154     PDFXMLObject(SkString xml) : fXML(std::move(xml)) {}
    155     void emitObject(SkWStream* stream,
    156                     const SkPDFObjNumMap& omap) const override {
    157         SkPDFDict dict("Metadata");
    158         dict.insertName("Subtype", "XML");
    159         dict.insertInt("Length", fXML.size());
    160         dict.emitObject(stream, omap);
    161         static const char streamBegin[] = " stream\n";
    162         stream->write(streamBegin, strlen(streamBegin));
    163         // Do not compress this.  The standard requires that a
    164         // program that does not understand PDF can grep for
    165         // "<?xpacket" and extract the entire XML.
    166         stream->write(fXML.c_str(), fXML.size());
    167         static const char streamEnd[] = "\nendstream";
    168         stream->write(streamEnd, strlen(streamEnd));
    169     }
    170 
    171 private:
    172     const SkString fXML;
    173 };
    174 }  // namespace
    175 
    176 static int count_xml_escape_size(const SkString& input) {
    177     int extra = 0;
    178     for (size_t i = 0; i < input.size(); ++i) {
    179         if (input[i] == '&') {
    180             extra += 4;  // strlen("&amp;") - strlen("&")
    181         } else if (input[i] == '<') {
    182             extra += 3;  // strlen("&lt;") - strlen("<")
    183         }
    184     }
    185     return extra;
    186 }
    187 
    188 const SkString escape_xml(const SkString& input,
    189                           const char* before = nullptr,
    190                           const char* after = nullptr) {
    191     if (input.size() == 0) {
    192         return input;
    193     }
    194     // "&" --> "&amp;" and  "<" --> "&lt;"
    195     // text is assumed to be in UTF-8
    196     // all strings are xml content, not attribute values.
    197     size_t beforeLen = before ? strlen(before) : 0;
    198     size_t afterLen = after ? strlen(after) : 0;
    199     int extra = count_xml_escape_size(input);
    200     SkString output(input.size() + extra + beforeLen + afterLen);
    201     char* out = output.writable_str();
    202     if (before) {
    203         strncpy(out, before, beforeLen);
    204         out += beforeLen;
    205     }
    206     static const char kAmp[] = "&amp;";
    207     static const char kLt[] = "&lt;";
    208     for (size_t i = 0; i < input.size(); ++i) {
    209         if (input[i] == '&') {
    210             strncpy(out, kAmp, strlen(kAmp));
    211             out += strlen(kAmp);
    212         } else if (input[i] == '<') {
    213             strncpy(out, kLt, strlen(kLt));
    214             out += strlen(kLt);
    215         } else {
    216             *out++ = input[i];
    217         }
    218     }
    219     if (after) {
    220         strncpy(out, after, afterLen);
    221         out += afterLen;
    222     }
    223     // Validate that we haven't written outside of our string.
    224     SkASSERT(out == &output.writable_str()[output.size()]);
    225     *out = '\0';
    226     return output;
    227 }
    228 
    229 sk_sp<SkPDFObject> SkPDFMetadata::MakeXMPObject(
    230         const SkDocument::PDFMetadata& metadata,
    231         const UUID& doc,
    232         const UUID& instance) {
    233     static const char templateString[] =
    234             "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
    235             "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n"
    236             " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, "
    237             "2012/08/23-13:03:03\">\n"
    238             "<rdf:RDF "
    239             "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
    240             "<rdf:Description rdf:about=\"\"\n"
    241             " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n"
    242             " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"
    243             " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n"
    244             " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n"
    245             " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n"
    246             "<pdfaid:part>2</pdfaid:part>\n"
    247             "<pdfaid:conformance>B</pdfaid:conformance>\n"
    248             "%s"  // ModifyDate
    249             "%s"  // CreateDate
    250             "%s"  // xmp:CreatorTool
    251             "<dc:format>application/pdf</dc:format>\n"
    252             "%s"  // dc:title
    253             "%s"  // dc:description
    254             "%s"  // author
    255             "%s"  // keywords
    256             "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n"
    257             "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n"
    258             "%s"  // pdf:Producer
    259             "%s"  // pdf:Keywords
    260             "</rdf:Description>\n"
    261             "</rdf:RDF>\n"
    262             "</x:xmpmeta>\n"  // Note:  the standard suggests 4k of padding.
    263             "<?xpacket end=\"w\"?>\n";
    264 
    265     SkString creationDate;
    266     SkString modificationDate;
    267     if (metadata.fCreation.fEnabled) {
    268         SkString tmp;
    269         metadata.fCreation.fDateTime.toISO8601(&tmp);
    270         SkASSERT(0 == count_xml_escape_size(tmp));
    271         // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape
    272         creationDate = SkStringPrintf("<xmp:CreateDate>%s</xmp:CreateDate>\n",
    273                                       tmp.c_str());
    274     }
    275     if (metadata.fModified.fEnabled) {
    276         SkString tmp;
    277         metadata.fModified.fDateTime.toISO8601(&tmp);
    278         SkASSERT(0 == count_xml_escape_size(tmp));
    279         modificationDate = SkStringPrintf(
    280                 "<xmp:ModifyDate>%s</xmp:ModifyDate>\n", tmp.c_str());
    281     }
    282     SkString title =
    283             escape_xml(metadata.fTitle,
    284                        "<dc:title><rdf:Alt><rdf:li xml:lang=\"x-default\">",
    285                        "</rdf:li></rdf:Alt></dc:title>\n");
    286     SkString author =
    287             escape_xml(metadata.fAuthor, "<dc:creator><rdf:Bag><rdf:li>",
    288                        "</rdf:li></rdf:Bag></dc:creator>\n");
    289     // TODO: in theory, XMP can support multiple authors.  Split on a delimiter?
    290     SkString subject = escape_xml(
    291             metadata.fSubject,
    292             "<dc:description><rdf:Alt><rdf:li xml:lang=\"x-default\">",
    293             "</rdf:li></rdf:Alt></dc:description>\n");
    294     SkString keywords1 =
    295             escape_xml(metadata.fKeywords, "<dc:subject><rdf:Bag><rdf:li>",
    296                        "</rdf:li></rdf:Bag></dc:subject>\n");
    297     SkString keywords2 = escape_xml(metadata.fKeywords, "<pdf:Keywords>",
    298                                     "</pdf:Keywords>\n");
    299     // TODO: in theory, keywords can be a list too.
    300 
    301     SkString producer("<pdf:Producer>" SKPDF_PRODUCER "</pdf:Producer>\n");
    302     if (!metadata.fProducer.isEmpty()) {
    303         // TODO: register a developer prefix to make
    304         // <skia:SKPDF_CUSTOM_PRODUCER_KEY> a real XML tag.
    305         producer = escape_xml(
    306                 metadata.fProducer, "<pdf:Producer>",
    307                 "</pdf:Producer>\n<!-- <skia:" SKPDF_CUSTOM_PRODUCER_KEY ">"
    308                 SKPDF_PRODUCER "</skia:" SKPDF_CUSTOM_PRODUCER_KEY "> -->\n");
    309     }
    310 
    311     SkString creator = escape_xml(metadata.fCreator, "<xmp:CreatorTool>",
    312                                   "</xmp:CreatorTool>\n");
    313     SkString documentID = uuid_to_string(doc);  // no need to escape
    314     SkASSERT(0 == count_xml_escape_size(documentID));
    315     SkString instanceID = uuid_to_string(instance);
    316     SkASSERT(0 == count_xml_escape_size(instanceID));
    317     return sk_make_sp<PDFXMLObject>(SkStringPrintf(
    318             templateString, modificationDate.c_str(), creationDate.c_str(),
    319             creator.c_str(), title.c_str(), subject.c_str(), author.c_str(),
    320             keywords1.c_str(), documentID.c_str(), instanceID.c_str(),
    321             producer.c_str(), keywords2.c_str()));
    322 }
    323 
    324 #undef SKPDF_CUSTOM_PRODUCER_KEY
    325 #undef SKPDF_PRODUCER
    326 #undef SKPDF_STRING
    327 #undef SKPDF_STRING_IMPL
    328