Home | History | Annotate | Download | only in xmpmeta
      1 #include "xmpmeta/xmp_parser.h"
      2 
      3 #include <algorithm>
      4 #include <cstring>
      5 #include <sstream>
      6 #include <stack>
      7 
      8 #include "android-base/logging.h"
      9 #include "strings/case.h"
     10 #include "strings/numbers.h"
     11 #include "xmpmeta/base64.h"
     12 #include "xmpmeta/jpeg_io.h"
     13 #include "xmpmeta/xml/const.h"
     14 #include "xmpmeta/xml/deserializer_impl.h"
     15 #include "xmpmeta/xml/search.h"
     16 #include "xmpmeta/xml/utils.h"
     17 #include "xmpmeta/xmp_const.h"
     18 
     19 using ::dynamic_depth::xmpmeta::xml::DepthFirstSearch;
     20 using ::dynamic_depth::xmpmeta::xml::DeserializerImpl;
     21 using ::dynamic_depth::xmpmeta::xml::FromXmlChar;
     22 using ::dynamic_depth::xmpmeta::xml::GetFirstDescriptionElement;
     23 
     24 namespace dynamic_depth {
     25 namespace xmpmeta {
     26 namespace {
     27 
     28 const char kJpgExtension[] = "jpg";
     29 const char kJpegExtension[] = "jpeg";
     30 
     31 bool BoolStringToBool(const string& bool_str, bool* value) {
     32   if (dynamic_depth::StringCaseEqual(bool_str, "true")) {
     33     *value = true;
     34     return true;
     35   }
     36   if (dynamic_depth::StringCaseEqual(bool_str, "false")) {
     37     *value = false;
     38     return true;
     39   }
     40   return false;
     41 }
     42 
     43 // Converts string_property to the type T.
     44 template <typename T>
     45 bool ConvertStringPropertyToType(const string& string_property, T* value);
     46 
     47 // Gets the end of the XMP meta content. If there is no packet wrapper, returns
     48 // data.length, otherwise returns 1 + the position of last '>' without '?'
     49 // before it. Usually the packet wrapper end is "<?xpacket end="w"?>.
     50 size_t GetXmpContentEnd(const string& data) {
     51   if (data.empty()) {
     52     return 0;
     53   }
     54   for (size_t i = data.size() - 1; i >= 1; --i) {
     55     if (data[i] == '>') {
     56       if (data[i - 1] != '?') {
     57         return i + 1;
     58       }
     59     }
     60   }
     61   // It should not reach here for a valid XMP meta.
     62   LOG(WARNING) << "Failed to find the end of the XMP meta content.";
     63   return data.size();
     64 }
     65 
     66 // True if 's' starts with substring 'x'.
     67 bool StartsWith(const string& s, const string& x) {
     68   return s.size() >= x.size() && !s.compare(0, x.size(), x);
     69 }
     70 // True if 's' ends with substring 'x'.
     71 bool EndsWith(const string& s, const string& x) {
     72   return s.size() >= x.size() && !s.compare(s.size() - x.size(), x.size(), x);
     73 }
     74 
     75 // Parses the first valid XMP section. Any other valid XMP section will be
     76 // ignored.
     77 bool ParseFirstValidXMPSection(const std::vector<Section>& sections,
     78                                XmpData* xmp) {
     79   for (const Section& section : sections) {
     80     if (StartsWith(section.data, XmpConst::Header())) {
     81       const size_t end = GetXmpContentEnd(section.data);
     82       // Increment header length by 1 for the null termination.
     83       const size_t header_length = strlen(XmpConst::Header()) + 1;
     84       // Check for integer underflow before subtracting.
     85       if (header_length >= end) {
     86         LOG(ERROR) << "Invalid content length: "
     87                    << static_cast<int>(end - header_length);
     88         return false;
     89       }
     90       const size_t content_length = end - header_length;
     91       // header_length is guaranteed to be <= data.size due to the if condition
     92       // above. If this contract changes we must add an additonal check.
     93       const char* content_start = &section.data[header_length];
     94       // xmlReadMemory requires an int. Before casting size_t to int we must
     95       // check for integer overflow.
     96       if (content_length > INT_MAX) {
     97         LOG(ERROR) << "First XMP section too large, size: " << content_length;
     98         return false;
     99       }
    100       *xmp->MutableStandardSection() = xmlReadMemory(
    101           content_start, static_cast<int>(content_length), nullptr, nullptr, 0);
    102       if (xmp->StandardSection() == nullptr) {
    103         LOG(WARNING) << "Failed to parse standard section.";
    104         return false;
    105       }
    106       return true;
    107     }
    108   }
    109   return false;
    110 }
    111 
    112 // Collects the extended XMP sections with the given name into a string. Other
    113 // sections will be ignored.
    114 string GetExtendedXmpSections(const std::vector<Section>& sections,
    115                               const string& section_name) {
    116   string extended_header = XmpConst::ExtensionHeader();
    117   extended_header += '\0' + section_name;
    118   // section_name is dynamically extracted from the xml file and can have an
    119   // arbitrary size. Check for integer overflow before addition.
    120   if (extended_header.size() > SIZE_MAX - XmpConst::ExtensionHeaderOffset()) {
    121     return "";
    122   }
    123   const size_t section_start_offset =
    124       extended_header.size() + XmpConst::ExtensionHeaderOffset();
    125 
    126   // Compute the size of the buffer to parse the extended sections.
    127   std::vector<const Section*> xmp_sections;
    128   std::vector<size_t> xmp_end_offsets;
    129   size_t buffer_size = 0;
    130   for (const Section& section : sections) {
    131     if (extended_header.empty() || StartsWith(section.data, extended_header)) {
    132       const size_t end_offset = section.data.size();
    133       const size_t section_size = end_offset - section_start_offset;
    134       if (end_offset < section_start_offset ||
    135           section_size > SIZE_MAX - buffer_size) {
    136         return "";
    137       }
    138       buffer_size += section_size;
    139       xmp_sections.push_back(&section);
    140       xmp_end_offsets.push_back(end_offset);
    141     }
    142   }
    143 
    144   // Copy all the relevant sections' data into a buffer.
    145   string buffer(buffer_size, '\0');
    146   if (buffer.size() != buffer_size) {
    147     return "";
    148   }
    149   size_t offset = 0;
    150   for (int i = 0; i < xmp_sections.size(); ++i) {
    151     const Section* section = xmp_sections[i];
    152     const size_t length = xmp_end_offsets[i] - section_start_offset;
    153     std::copy_n(&section->data[section_start_offset], length, &buffer[offset]);
    154     offset += length;
    155   }
    156   return buffer;
    157 }
    158 
    159 // Parses the extended XMP sections with the given name. All other sections
    160 // will be ignored.
    161 bool ParseExtendedXmpSections(const std::vector<Section>& sections,
    162                               const string& section_name, XmpData* xmp_data) {
    163   const string extended_sections =
    164       GetExtendedXmpSections(sections, section_name);
    165   // xmlReadMemory requires an int. Before casting size_t to int we must check
    166   // for integer overflow.
    167   if (extended_sections.size() > INT_MAX) {
    168     LOG(WARNING) << "Extended sections too large, size: "
    169                  << extended_sections.size();
    170     return false;
    171   }
    172   *xmp_data->MutableExtendedSection() = xmlReadMemory(
    173       extended_sections.data(), static_cast<int>(extended_sections.size()),
    174       nullptr, nullptr, XML_PARSE_HUGE);
    175   if (xmp_data->ExtendedSection() == nullptr) {
    176     LOG(WARNING) << "Failed to parse extended sections.";
    177     return false;
    178   }
    179   return true;
    180 }
    181 
    182 // Extracts a XmpData from a JPEG image stream.
    183 bool ExtractXmpMeta(const bool skip_extended, std::istream* file,
    184                     XmpData* xmp_data) {
    185   // We cannot use CHECK because this is ported to AOSP.
    186   assert(xmp_data != nullptr);  // NOLINT
    187   xmp_data->Reset();
    188 
    189   ParseOptions parse_options;
    190   parse_options.read_meta_only = true;
    191   if (skip_extended) {
    192     parse_options.section_header = XmpConst::Header();
    193     parse_options.section_header_return_first = true;
    194   }
    195   const std::vector<Section> sections = Parse(parse_options, file);
    196   if (sections.empty()) {
    197     LOG(WARNING) << "No sections found.";
    198     return false;
    199   }
    200 
    201   if (!ParseFirstValidXMPSection(sections, xmp_data)) {
    202     LOG(WARNING) << "Could not parse first section.";
    203     return false;
    204   }
    205   if (skip_extended) {
    206     return true;
    207   }
    208   string extension_name;
    209   DeserializerImpl deserializer(
    210       GetFirstDescriptionElement(xmp_data->StandardSection()));
    211   if (!deserializer.ParseString(XmpConst::HasExtensionPrefix(),
    212                                 XmpConst::HasExtension(), &extension_name)) {
    213     // No extended sections present, so nothing to parse.
    214     return true;
    215   }
    216   if (!ParseExtendedXmpSections(sections, extension_name, xmp_data)) {
    217     LOG(WARNING) << "Extended sections present, but could not be parsed.";
    218     return false;
    219   }
    220   return true;
    221 }
    222 
    223 // Extracts the specified string attribute.
    224 bool GetStringProperty(const xmlNodePtr node, const char* prefix,
    225                        const char* property, string* value) {
    226   const xmlDocPtr doc = node->doc;
    227   for (const _xmlAttr* attribute = node->properties; attribute != nullptr;
    228        attribute = attribute->next) {
    229     if (attribute->ns &&
    230         strcmp(FromXmlChar(attribute->ns->prefix), prefix) == 0 &&
    231         strcmp(FromXmlChar(attribute->name), property) == 0) {
    232       xmlChar* attribute_string =
    233           xmlNodeListGetString(doc, attribute->children, 1);
    234       *value = FromXmlChar(attribute_string);
    235       xmlFree(attribute_string);
    236       return true;
    237     }
    238   }
    239   return false;
    240 }
    241 
    242 // Reads the contents of a node.
    243 // E.g. <prefix:node_name>Contents Here</prefix:node_name>
    244 bool ReadNodeContent(const xmlNodePtr node, const char* prefix,
    245                      const char* node_name, string* value) {
    246   auto* element = DepthFirstSearch(node, node_name);
    247   if (element == nullptr) {
    248     return false;
    249   }
    250   if (prefix != nullptr &&
    251       (element->ns == nullptr || element->ns->prefix == nullptr ||
    252        strcmp(FromXmlChar(element->ns->prefix), prefix) != 0)) {
    253     return false;
    254   }
    255   xmlChar* node_content = xmlNodeGetContent(element);
    256   *value = FromXmlChar(node_content);
    257   free(node_content);
    258   return true;
    259 }
    260 
    261 template <typename T>
    262 bool ConvertStringPropertyToType(const string& string_property, T* value) {
    263   QCHECK(value) << "Cannot call this method on a generic type";
    264   return false;
    265 }
    266 
    267 template <>
    268 bool ConvertStringPropertyToType<bool>(const string& string_property,
    269                                        bool* value) {
    270   return BoolStringToBool(string_property, value);
    271 }
    272 
    273 template <>
    274 bool ConvertStringPropertyToType<double>(const string& string_property,
    275                                          double* value) {
    276   *value = std::stod(string_property);
    277   return true;
    278 }
    279 
    280 template <>
    281 bool ConvertStringPropertyToType<int>(const string& string_property,
    282                                       int* value) {
    283   *value = 0;
    284   for (int i = 0; i < string_property.size(); ++i) {
    285     if (!isdigit(string_property[i])) {
    286       return false;
    287     }
    288   }
    289 
    290   *value = std::atoi(string_property.c_str());  // NOLINT
    291   return true;
    292 }
    293 
    294 template <>
    295 bool ConvertStringPropertyToType<int64>(const string& string_property,
    296                                         int64* value) {
    297   *value = std::stol(string_property);
    298   return true;
    299 }
    300 
    301 }  // namespace
    302 
    303 bool ReadXmpHeader(const string& filename, const bool skip_extended,
    304                    XmpData* xmp_data) {
    305   string filename_lower = filename;
    306   std::transform(filename_lower.begin(), filename_lower.end(),
    307                  filename_lower.begin(), ::tolower);
    308   if (!EndsWith(filename_lower, kJpgExtension) &&
    309       !EndsWith(filename_lower, kJpegExtension)) {
    310     LOG(WARNING) << "XMP parse: only JPEG file is supported";
    311     return false;
    312   }
    313 
    314   std::ifstream file(filename.c_str(), std::ios::binary);
    315   if (!file.is_open()) {
    316     LOG(WARNING) << " Could not read file: " << filename;
    317     return false;
    318   }
    319   return ExtractXmpMeta(skip_extended, &file, xmp_data);
    320 }
    321 
    322 bool ReadXmpFromMemory(const string& jpeg_contents, const bool skip_extended,
    323                        XmpData* xmp_data) {
    324   std::istringstream stream(jpeg_contents);
    325   return ExtractXmpMeta(skip_extended, &stream, xmp_data);
    326 }
    327 
    328 bool ReadXmpHeader(std::istream* input_stream, bool skip_extended,
    329                    XmpData* xmp_data) {
    330   return ExtractXmpMeta(skip_extended, input_stream, xmp_data);
    331 }
    332 
    333 }  // namespace xmpmeta
    334 }  // namespace dynamic_depth
    335