1 #include "xmpmeta/xmp_parser.h" 2 3 #include <algorithm> 4 #include <cstring> 5 #include <sstream> 6 #include <stack> 7 8 #include "android-base/logging.h" 9 #include "strings/case.h" 10 #include "strings/numbers.h" 11 #include "xmpmeta/base64.h" 12 #include "xmpmeta/jpeg_io.h" 13 #include "xmpmeta/xml/const.h" 14 #include "xmpmeta/xml/deserializer_impl.h" 15 #include "xmpmeta/xml/search.h" 16 #include "xmpmeta/xml/utils.h" 17 #include "xmpmeta/xmp_const.h" 18 19 using ::dynamic_depth::xmpmeta::xml::DepthFirstSearch; 20 using ::dynamic_depth::xmpmeta::xml::DeserializerImpl; 21 using ::dynamic_depth::xmpmeta::xml::FromXmlChar; 22 using ::dynamic_depth::xmpmeta::xml::GetFirstDescriptionElement; 23 24 namespace dynamic_depth { 25 namespace xmpmeta { 26 namespace { 27 28 const char kJpgExtension[] = "jpg"; 29 const char kJpegExtension[] = "jpeg"; 30 31 bool BoolStringToBool(const string& bool_str, bool* value) { 32 if (dynamic_depth::StringCaseEqual(bool_str, "true")) { 33 *value = true; 34 return true; 35 } 36 if (dynamic_depth::StringCaseEqual(bool_str, "false")) { 37 *value = false; 38 return true; 39 } 40 return false; 41 } 42 43 // Converts string_property to the type T. 44 template <typename T> 45 bool ConvertStringPropertyToType(const string& string_property, T* value); 46 47 // Gets the end of the XMP meta content. If there is no packet wrapper, returns 48 // data.length, otherwise returns 1 + the position of last '>' without '?' 49 // before it. Usually the packet wrapper end is "<?xpacket end="w"?>. 50 size_t GetXmpContentEnd(const string& data) { 51 if (data.empty()) { 52 return 0; 53 } 54 for (size_t i = data.size() - 1; i >= 1; --i) { 55 if (data[i] == '>') { 56 if (data[i - 1] != '?') { 57 return i + 1; 58 } 59 } 60 } 61 // It should not reach here for a valid XMP meta. 62 LOG(WARNING) << "Failed to find the end of the XMP meta content."; 63 return data.size(); 64 } 65 66 // True if 's' starts with substring 'x'. 67 bool StartsWith(const string& s, const string& x) { 68 return s.size() >= x.size() && !s.compare(0, x.size(), x); 69 } 70 // True if 's' ends with substring 'x'. 71 bool EndsWith(const string& s, const string& x) { 72 return s.size() >= x.size() && !s.compare(s.size() - x.size(), x.size(), x); 73 } 74 75 // Parses the first valid XMP section. Any other valid XMP section will be 76 // ignored. 77 bool ParseFirstValidXMPSection(const std::vector<Section>& sections, 78 XmpData* xmp) { 79 for (const Section& section : sections) { 80 if (StartsWith(section.data, XmpConst::Header())) { 81 const size_t end = GetXmpContentEnd(section.data); 82 // Increment header length by 1 for the null termination. 83 const size_t header_length = strlen(XmpConst::Header()) + 1; 84 // Check for integer underflow before subtracting. 85 if (header_length >= end) { 86 LOG(ERROR) << "Invalid content length: " 87 << static_cast<int>(end - header_length); 88 return false; 89 } 90 const size_t content_length = end - header_length; 91 // header_length is guaranteed to be <= data.size due to the if condition 92 // above. If this contract changes we must add an additonal check. 93 const char* content_start = §ion.data[header_length]; 94 // xmlReadMemory requires an int. Before casting size_t to int we must 95 // check for integer overflow. 96 if (content_length > INT_MAX) { 97 LOG(ERROR) << "First XMP section too large, size: " << content_length; 98 return false; 99 } 100 *xmp->MutableStandardSection() = xmlReadMemory( 101 content_start, static_cast<int>(content_length), nullptr, nullptr, 0); 102 if (xmp->StandardSection() == nullptr) { 103 LOG(WARNING) << "Failed to parse standard section."; 104 return false; 105 } 106 return true; 107 } 108 } 109 return false; 110 } 111 112 // Collects the extended XMP sections with the given name into a string. Other 113 // sections will be ignored. 114 string GetExtendedXmpSections(const std::vector<Section>& sections, 115 const string& section_name) { 116 string extended_header = XmpConst::ExtensionHeader(); 117 extended_header += '\0' + section_name; 118 // section_name is dynamically extracted from the xml file and can have an 119 // arbitrary size. Check for integer overflow before addition. 120 if (extended_header.size() > SIZE_MAX - XmpConst::ExtensionHeaderOffset()) { 121 return ""; 122 } 123 const size_t section_start_offset = 124 extended_header.size() + XmpConst::ExtensionHeaderOffset(); 125 126 // Compute the size of the buffer to parse the extended sections. 127 std::vector<const Section*> xmp_sections; 128 std::vector<size_t> xmp_end_offsets; 129 size_t buffer_size = 0; 130 for (const Section& section : sections) { 131 if (extended_header.empty() || StartsWith(section.data, extended_header)) { 132 const size_t end_offset = section.data.size(); 133 const size_t section_size = end_offset - section_start_offset; 134 if (end_offset < section_start_offset || 135 section_size > SIZE_MAX - buffer_size) { 136 return ""; 137 } 138 buffer_size += section_size; 139 xmp_sections.push_back(§ion); 140 xmp_end_offsets.push_back(end_offset); 141 } 142 } 143 144 // Copy all the relevant sections' data into a buffer. 145 string buffer(buffer_size, '\0'); 146 if (buffer.size() != buffer_size) { 147 return ""; 148 } 149 size_t offset = 0; 150 for (int i = 0; i < xmp_sections.size(); ++i) { 151 const Section* section = xmp_sections[i]; 152 const size_t length = xmp_end_offsets[i] - section_start_offset; 153 std::copy_n(§ion->data[section_start_offset], length, &buffer[offset]); 154 offset += length; 155 } 156 return buffer; 157 } 158 159 // Parses the extended XMP sections with the given name. All other sections 160 // will be ignored. 161 bool ParseExtendedXmpSections(const std::vector<Section>& sections, 162 const string& section_name, XmpData* xmp_data) { 163 const string extended_sections = 164 GetExtendedXmpSections(sections, section_name); 165 // xmlReadMemory requires an int. Before casting size_t to int we must check 166 // for integer overflow. 167 if (extended_sections.size() > INT_MAX) { 168 LOG(WARNING) << "Extended sections too large, size: " 169 << extended_sections.size(); 170 return false; 171 } 172 *xmp_data->MutableExtendedSection() = xmlReadMemory( 173 extended_sections.data(), static_cast<int>(extended_sections.size()), 174 nullptr, nullptr, XML_PARSE_HUGE); 175 if (xmp_data->ExtendedSection() == nullptr) { 176 LOG(WARNING) << "Failed to parse extended sections."; 177 return false; 178 } 179 return true; 180 } 181 182 // Extracts a XmpData from a JPEG image stream. 183 bool ExtractXmpMeta(const bool skip_extended, std::istream* file, 184 XmpData* xmp_data) { 185 // We cannot use CHECK because this is ported to AOSP. 186 assert(xmp_data != nullptr); // NOLINT 187 xmp_data->Reset(); 188 189 ParseOptions parse_options; 190 parse_options.read_meta_only = true; 191 if (skip_extended) { 192 parse_options.section_header = XmpConst::Header(); 193 parse_options.section_header_return_first = true; 194 } 195 const std::vector<Section> sections = Parse(parse_options, file); 196 if (sections.empty()) { 197 LOG(WARNING) << "No sections found."; 198 return false; 199 } 200 201 if (!ParseFirstValidXMPSection(sections, xmp_data)) { 202 LOG(WARNING) << "Could not parse first section."; 203 return false; 204 } 205 if (skip_extended) { 206 return true; 207 } 208 string extension_name; 209 DeserializerImpl deserializer( 210 GetFirstDescriptionElement(xmp_data->StandardSection())); 211 if (!deserializer.ParseString(XmpConst::HasExtensionPrefix(), 212 XmpConst::HasExtension(), &extension_name)) { 213 // No extended sections present, so nothing to parse. 214 return true; 215 } 216 if (!ParseExtendedXmpSections(sections, extension_name, xmp_data)) { 217 LOG(WARNING) << "Extended sections present, but could not be parsed."; 218 return false; 219 } 220 return true; 221 } 222 223 // Extracts the specified string attribute. 224 bool GetStringProperty(const xmlNodePtr node, const char* prefix, 225 const char* property, string* value) { 226 const xmlDocPtr doc = node->doc; 227 for (const _xmlAttr* attribute = node->properties; attribute != nullptr; 228 attribute = attribute->next) { 229 if (attribute->ns && 230 strcmp(FromXmlChar(attribute->ns->prefix), prefix) == 0 && 231 strcmp(FromXmlChar(attribute->name), property) == 0) { 232 xmlChar* attribute_string = 233 xmlNodeListGetString(doc, attribute->children, 1); 234 *value = FromXmlChar(attribute_string); 235 xmlFree(attribute_string); 236 return true; 237 } 238 } 239 return false; 240 } 241 242 // Reads the contents of a node. 243 // E.g. <prefix:node_name>Contents Here</prefix:node_name> 244 bool ReadNodeContent(const xmlNodePtr node, const char* prefix, 245 const char* node_name, string* value) { 246 auto* element = DepthFirstSearch(node, node_name); 247 if (element == nullptr) { 248 return false; 249 } 250 if (prefix != nullptr && 251 (element->ns == nullptr || element->ns->prefix == nullptr || 252 strcmp(FromXmlChar(element->ns->prefix), prefix) != 0)) { 253 return false; 254 } 255 xmlChar* node_content = xmlNodeGetContent(element); 256 *value = FromXmlChar(node_content); 257 free(node_content); 258 return true; 259 } 260 261 template <typename T> 262 bool ConvertStringPropertyToType(const string& string_property, T* value) { 263 QCHECK(value) << "Cannot call this method on a generic type"; 264 return false; 265 } 266 267 template <> 268 bool ConvertStringPropertyToType<bool>(const string& string_property, 269 bool* value) { 270 return BoolStringToBool(string_property, value); 271 } 272 273 template <> 274 bool ConvertStringPropertyToType<double>(const string& string_property, 275 double* value) { 276 *value = std::stod(string_property); 277 return true; 278 } 279 280 template <> 281 bool ConvertStringPropertyToType<int>(const string& string_property, 282 int* value) { 283 *value = 0; 284 for (int i = 0; i < string_property.size(); ++i) { 285 if (!isdigit(string_property[i])) { 286 return false; 287 } 288 } 289 290 *value = std::atoi(string_property.c_str()); // NOLINT 291 return true; 292 } 293 294 template <> 295 bool ConvertStringPropertyToType<int64>(const string& string_property, 296 int64* value) { 297 *value = std::stol(string_property); 298 return true; 299 } 300 301 } // namespace 302 303 bool ReadXmpHeader(const string& filename, const bool skip_extended, 304 XmpData* xmp_data) { 305 string filename_lower = filename; 306 std::transform(filename_lower.begin(), filename_lower.end(), 307 filename_lower.begin(), ::tolower); 308 if (!EndsWith(filename_lower, kJpgExtension) && 309 !EndsWith(filename_lower, kJpegExtension)) { 310 LOG(WARNING) << "XMP parse: only JPEG file is supported"; 311 return false; 312 } 313 314 std::ifstream file(filename.c_str(), std::ios::binary); 315 if (!file.is_open()) { 316 LOG(WARNING) << " Could not read file: " << filename; 317 return false; 318 } 319 return ExtractXmpMeta(skip_extended, &file, xmp_data); 320 } 321 322 bool ReadXmpFromMemory(const string& jpeg_contents, const bool skip_extended, 323 XmpData* xmp_data) { 324 std::istringstream stream(jpeg_contents); 325 return ExtractXmpMeta(skip_extended, &stream, xmp_data); 326 } 327 328 bool ReadXmpHeader(std::istream* input_stream, bool skip_extended, 329 XmpData* xmp_data) { 330 return ExtractXmpMeta(skip_extended, input_stream, xmp_data); 331 } 332 333 } // namespace xmpmeta 334 } // namespace dynamic_depth 335