1 // ================================================================================================= 2 // ADOBE SYSTEMS INCORPORATED 3 // Copyright 2006 Adobe Systems Incorporated 4 // All Rights Reserved 5 // 6 // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms 7 // of the Adobe license agreement accompanying it. 8 // ================================================================================================= 9 10 package com.adobe.xmp.impl; 11 12 import java.util.Calendar; 13 import java.util.HashMap; 14 import java.util.Iterator; 15 import java.util.Map; 16 17 import com.adobe.xmp.XMPConst; 18 import com.adobe.xmp.XMPDateTime; 19 import com.adobe.xmp.XMPError; 20 import com.adobe.xmp.XMPException; 21 import com.adobe.xmp.XMPMeta; 22 import com.adobe.xmp.XMPMetaFactory; 23 import com.adobe.xmp.XMPUtils; 24 import com.adobe.xmp.impl.xpath.XMPPath; 25 import com.adobe.xmp.impl.xpath.XMPPathParser; 26 import com.adobe.xmp.options.ParseOptions; 27 import com.adobe.xmp.options.PropertyOptions; 28 import com.adobe.xmp.properties.XMPAliasInfo; 29 30 /** 31 * @since Aug 18, 2006 32 */ 33 public class XMPNormalizer 34 { 35 /** caches the correct dc-property array forms */ 36 private static Map dcArrayForms; 37 /** init char tables */ 38 static 39 { 40 initDCArrays(); 41 } 42 43 44 /** 45 * Hidden constructor 46 */ 47 private XMPNormalizer() 48 { 49 // EMPTY 50 } 51 52 53 /** 54 * Normalizes a raw parsed XMPMeta-Object 55 * @param xmp the raw metadata object 56 * @param options the parsing options 57 * @return Returns the normalized metadata object 58 * @throws XMPException Collects all severe processing errors. 59 */ 60 static XMPMeta process(XMPMetaImpl xmp, ParseOptions options) throws XMPException 61 { 62 XMPNode tree = xmp.getRoot(); 63 64 touchUpDataModel(xmp); 65 moveExplicitAliases(tree, options); 66 67 tweakOldXMP(tree); 68 69 deleteEmptySchemas(tree); 70 71 return xmp; 72 } 73 74 75 /** 76 * Tweak old XMP: Move an instance ID from rdf:about to the 77 * <em>xmpMM:InstanceID</em> property. An old instance ID usually looks 78 * like "uuid:bac965c4-9d87-11d9-9a30-000d936b79c4", plus InDesign 79 * 3.0 wrote them like "bac965c4-9d87-11d9-9a30-000d936b79c4". If 80 * the name looks like a UUID simply move it to <em>xmpMM:InstanceID</em>, 81 * don't worry about any existing <em>xmpMM:InstanceID</em>. Both will 82 * only be present when a newer file with the <em>xmpMM:InstanceID</em> 83 * property is updated by an old app that uses <em>rdf:about</em>. 84 * 85 * @param tree the root of the metadata tree 86 * @throws XMPException Thrown if tweaking fails. 87 */ 88 private static void tweakOldXMP(XMPNode tree) throws XMPException 89 { 90 if (tree.getName() != null && tree.getName().length() >= Utils.UUID_LENGTH) 91 { 92 String nameStr = tree.getName().toLowerCase(); 93 if (nameStr.startsWith("uuid:")) 94 { 95 nameStr = nameStr.substring(5); 96 } 97 98 if (Utils.checkUUIDFormat(nameStr)) 99 { 100 // move UUID to xmpMM:InstanceID and remove it from the root node 101 XMPPath path = XMPPathParser.expandXPath(XMPConst.NS_XMP_MM, "InstanceID"); 102 XMPNode idNode = XMPNodeUtils.findNode (tree, path, true, null); 103 if (idNode != null) 104 { 105 idNode.setOptions(null); // Clobber any existing xmpMM:InstanceID. 106 idNode.setValue("uuid:" + nameStr); 107 idNode.removeChildren(); 108 idNode.removeQualifiers(); 109 tree.setName(null); 110 } 111 else 112 { 113 throw new XMPException("Failure creating xmpMM:InstanceID", 114 XMPError.INTERNALFAILURE); 115 } 116 } 117 } 118 } 119 120 121 /** 122 * Visit all schemas to do general fixes and handle special cases. 123 * 124 * @param xmp the metadata object implementation 125 * @throws XMPException Thrown if the normalisation fails. 126 */ 127 private static void touchUpDataModel(XMPMetaImpl xmp) throws XMPException 128 { 129 // make sure the DC schema is existing, because it might be needed within the normalization 130 // if not touched it will be removed by removeEmptySchemas 131 XMPNodeUtils.findSchemaNode(xmp.getRoot(), XMPConst.NS_DC, true); 132 133 // Do the special case fixes within each schema. 134 for (Iterator it = xmp.getRoot().iterateChildren(); it.hasNext();) 135 { 136 XMPNode currSchema = (XMPNode) it.next(); 137 if (XMPConst.NS_DC.equals(currSchema.getName())) 138 { 139 normalizeDCArrays(currSchema); 140 } 141 else if (XMPConst.NS_EXIF.equals(currSchema.getName())) 142 { 143 // Do a special case fix for exif:GPSTimeStamp. 144 fixGPSTimeStamp(currSchema); 145 XMPNode arrayNode = XMPNodeUtils.findChildNode(currSchema, "exif:UserComment", 146 false); 147 if (arrayNode != null) 148 { 149 repairAltText(arrayNode); 150 } 151 } 152 else if (XMPConst.NS_DM.equals(currSchema.getName())) 153 { 154 // Do a special case migration of xmpDM:copyright to 155 // dc:rights['x-default']. 156 XMPNode dmCopyright = XMPNodeUtils.findChildNode(currSchema, "xmpDM:copyright", 157 false); 158 if (dmCopyright != null) 159 { 160 migrateAudioCopyright(xmp, dmCopyright); 161 } 162 } 163 else if (XMPConst.NS_XMP_RIGHTS.equals(currSchema.getName())) 164 { 165 XMPNode arrayNode = XMPNodeUtils.findChildNode(currSchema, "xmpRights:UsageTerms", 166 false); 167 if (arrayNode != null) 168 { 169 repairAltText(arrayNode); 170 } 171 } 172 } 173 } 174 175 176 /** 177 * Undo the denormalization performed by the XMP used in Acrobat 5.<br> 178 * If a Dublin Core array had only one item, it was serialized as a simple 179 * property. <br> 180 * The <code>xml:lang</code> attribute was dropped from an 181 * <code>alt-text</code> item if the language was <code>x-default</code>. 182 * 183 * @param dcSchema the DC schema node 184 * @throws XMPException Thrown if normalization fails 185 */ 186 private static void normalizeDCArrays(XMPNode dcSchema) throws XMPException 187 { 188 for (int i = 1; i <= dcSchema.getChildrenLength(); i++) 189 { 190 XMPNode currProp = dcSchema.getChild(i); 191 192 PropertyOptions arrayForm = (PropertyOptions) dcArrayForms.get(currProp.getName()); 193 if (arrayForm == null) 194 { 195 continue; 196 } 197 else if (currProp.getOptions().isSimple()) 198 { 199 // create a new array and add the current property as child, 200 // if it was formerly simple 201 XMPNode newArray = new XMPNode(currProp.getName(), arrayForm); 202 currProp.setName(XMPConst.ARRAY_ITEM_NAME); 203 newArray.addChild(currProp); 204 dcSchema.replaceChild(i, newArray); 205 206 // fix language alternatives 207 if (arrayForm.isArrayAltText() && !currProp.getOptions().getHasLanguage()) 208 { 209 XMPNode newLang = new XMPNode(XMPConst.XML_LANG, XMPConst.X_DEFAULT, null); 210 currProp.addQualifier(newLang); 211 } 212 } 213 else 214 { 215 // clear array options and add corrected array form if it has been an array before 216 currProp.getOptions().setOption( 217 PropertyOptions.ARRAY | 218 PropertyOptions.ARRAY_ORDERED | 219 PropertyOptions.ARRAY_ALTERNATE | 220 PropertyOptions.ARRAY_ALT_TEXT, 221 false); 222 currProp.getOptions().mergeWith(arrayForm); 223 224 if (arrayForm.isArrayAltText()) 225 { 226 // applying for "dc:description", "dc:rights", "dc:title" 227 repairAltText(currProp); 228 } 229 } 230 231 } 232 } 233 234 235 /** 236 * Make sure that the array is well-formed AltText. Each item must be simple 237 * and have an "xml:lang" qualifier. If repairs are needed, keep simple 238 * non-empty items by adding the "xml:lang" with value "x-repair". 239 * @param arrayNode the property node of the array to repair. 240 * @throws XMPException Forwards unexpected exceptions. 241 */ 242 private static void repairAltText(XMPNode arrayNode) throws XMPException 243 { 244 if (arrayNode == null || 245 !arrayNode.getOptions().isArray()) 246 { 247 // Already OK or not even an array. 248 return; 249 } 250 251 // fix options 252 arrayNode.getOptions().setArrayOrdered(true).setArrayAlternate(true).setArrayAltText(true); 253 254 for (Iterator it = arrayNode.iterateChildren(); it.hasNext();) 255 { 256 XMPNode currChild = (XMPNode) it.next(); 257 if (currChild.getOptions().isCompositeProperty()) 258 { 259 // Delete non-simple children. 260 it.remove(); 261 } 262 else if (!currChild.getOptions().getHasLanguage()) 263 { 264 String childValue = currChild.getValue(); 265 if (childValue == null || childValue.length() == 0) 266 { 267 // Delete empty valued children that have no xml:lang. 268 it.remove(); 269 } 270 else 271 { 272 // Add an xml:lang qualifier with the value "x-repair". 273 XMPNode repairLang = new XMPNode(XMPConst.XML_LANG, "x-repair", null); 274 currChild.addQualifier(repairLang); 275 } 276 } 277 } 278 } 279 280 281 /** 282 * Visit all of the top level nodes looking for aliases. If there is 283 * no base, transplant the alias subtree. If there is a base and strict 284 * aliasing is on, make sure the alias and base subtrees match. 285 * 286 * @param tree the root of the metadata tree 287 * @param options th parsing options 288 * @throws XMPException Forwards XMP errors 289 */ 290 private static void moveExplicitAliases(XMPNode tree, ParseOptions options) 291 throws XMPException 292 { 293 if (!tree.getHasAliases()) 294 { 295 return; 296 } 297 tree.setHasAliases(false); 298 299 boolean strictAliasing = options.getStrictAliasing(); 300 301 for (Iterator schemaIt = tree.getUnmodifiableChildren().iterator(); schemaIt.hasNext();) 302 { 303 XMPNode currSchema = (XMPNode) schemaIt.next(); 304 if (!currSchema.getHasAliases()) 305 { 306 continue; 307 } 308 309 for (Iterator propertyIt = currSchema.iterateChildren(); propertyIt.hasNext();) 310 { 311 XMPNode currProp = (XMPNode) propertyIt.next(); 312 313 if (!currProp.isAlias()) 314 { 315 continue; 316 } 317 318 currProp.setAlias(false); 319 320 // Find the base path, look for the base schema and root node. 321 XMPAliasInfo info = XMPMetaFactory.getSchemaRegistry() 322 .findAlias(currProp.getName()); 323 if (info != null) 324 { 325 // find or create schema 326 XMPNode baseSchema = XMPNodeUtils.findSchemaNode(tree, info 327 .getNamespace(), null, true); 328 baseSchema.setImplicit(false); 329 330 XMPNode baseNode = XMPNodeUtils 331 .findChildNode(baseSchema, 332 info.getPrefix() + info.getPropName(), false); 333 if (baseNode == null) 334 { 335 if (info.getAliasForm().isSimple()) 336 { 337 // A top-to-top alias, transplant the property. 338 // change the alias property name to the base name 339 String qname = info.getPrefix() + info.getPropName(); 340 currProp.setName(qname); 341 baseSchema.addChild(currProp); 342 // remove the alias property 343 propertyIt.remove(); 344 } 345 else 346 { 347 // An alias to an array item, 348 // create the array and transplant the property. 349 baseNode = new XMPNode(info.getPrefix() + info.getPropName(), info 350 .getAliasForm().toPropertyOptions()); 351 baseSchema.addChild(baseNode); 352 transplantArrayItemAlias (propertyIt, currProp, baseNode); 353 } 354 355 } 356 else if (info.getAliasForm().isSimple()) 357 { 358 // The base node does exist and this is a top-to-top alias. 359 // Check for conflicts if strict aliasing is on. 360 // Remove and delete the alias subtree. 361 if (strictAliasing) 362 { 363 compareAliasedSubtrees (currProp, baseNode, true); 364 } 365 366 propertyIt.remove(); 367 } 368 else 369 { 370 // This is an alias to an array item and the array exists. 371 // Look for the aliased item. 372 // Then transplant or check & delete as appropriate. 373 374 XMPNode itemNode = null; 375 if (info.getAliasForm().isArrayAltText()) 376 { 377 int xdIndex = XMPNodeUtils.lookupLanguageItem(baseNode, 378 XMPConst.X_DEFAULT); 379 if (xdIndex != -1) 380 { 381 itemNode = baseNode.getChild(xdIndex); 382 } 383 } 384 else if (baseNode.hasChildren()) 385 { 386 itemNode = baseNode.getChild(1); 387 } 388 389 if (itemNode == null) 390 { 391 transplantArrayItemAlias (propertyIt, currProp, baseNode); 392 } 393 else 394 { 395 if (strictAliasing) 396 { 397 compareAliasedSubtrees (currProp, itemNode, true); 398 } 399 400 propertyIt.remove(); 401 } 402 } 403 } 404 } 405 currSchema.setHasAliases(false); 406 } 407 } 408 409 410 /** 411 * Moves an alias node of array form to another schema into an array 412 * @param propertyIt the property iterator of the old schema (used to delete the property) 413 * @param childNode the node to be moved 414 * @param baseArray the base array for the array item 415 * @throws XMPException Forwards XMP errors 416 */ 417 private static void transplantArrayItemAlias(Iterator propertyIt, XMPNode childNode, 418 XMPNode baseArray) throws XMPException 419 { 420 if (baseArray.getOptions().isArrayAltText()) 421 { 422 if (childNode.getOptions().getHasLanguage()) 423 { 424 throw new XMPException("Alias to x-default already has a language qualifier", 425 XMPError.BADXMP); 426 } 427 428 XMPNode langQual = new XMPNode(XMPConst.XML_LANG, XMPConst.X_DEFAULT, null); 429 childNode.addQualifier(langQual); 430 } 431 432 propertyIt.remove(); 433 childNode.setName(XMPConst.ARRAY_ITEM_NAME); 434 baseArray.addChild(childNode); 435 } 436 437 438 /** 439 * Fixes the GPS Timestamp in EXIF. 440 * @param exifSchema the EXIF schema node 441 * @throws XMPException Thrown if the date conversion fails. 442 */ 443 private static void fixGPSTimeStamp(XMPNode exifSchema) 444 throws XMPException 445 { 446 // Note: if dates are not found the convert-methods throws an exceptions, 447 // and this methods returns. 448 XMPNode gpsDateTime = XMPNodeUtils.findChildNode(exifSchema, "exif:GPSTimeStamp", false); 449 if (gpsDateTime == null) 450 { 451 return; 452 } 453 454 try 455 { 456 XMPDateTime binGPSStamp; 457 XMPDateTime binOtherDate; 458 459 binGPSStamp = XMPUtils.convertToDate(gpsDateTime.getValue()); 460 if (binGPSStamp.getYear() != 0 || 461 binGPSStamp.getMonth() != 0 || 462 binGPSStamp.getDay() != 0) 463 { 464 return; 465 } 466 467 XMPNode otherDate = XMPNodeUtils.findChildNode(exifSchema, "exif:DateTimeOriginal", 468 false); 469 if (otherDate == null) 470 { 471 otherDate = XMPNodeUtils.findChildNode(exifSchema, "exif:DateTimeDigitized", false); 472 } 473 474 binOtherDate = XMPUtils.convertToDate(otherDate.getValue()); 475 Calendar cal = binGPSStamp.getCalendar(); 476 cal.set(Calendar.YEAR, binOtherDate.getYear()); 477 cal.set(Calendar.MONTH, binOtherDate.getMonth()); 478 cal.set(Calendar.DAY_OF_MONTH, binOtherDate.getDay()); 479 binGPSStamp = new XMPDateTimeImpl(cal); 480 gpsDateTime.setValue(XMPUtils.convertFromDate (binGPSStamp)); 481 } 482 catch (XMPException e) 483 { 484 // Don't let a missing or bad date stop other things. 485 return; 486 } 487 } 488 489 490 491 /** 492 * Remove all empty schemas from the metadata tree that were generated during the rdf parsing. 493 * @param tree the root of the metadata tree 494 */ 495 private static void deleteEmptySchemas(XMPNode tree) 496 { 497 // Delete empty schema nodes. Do this last, other cleanup can make empty 498 // schema. 499 500 for (Iterator it = tree.iterateChildren(); it.hasNext();) 501 { 502 XMPNode schema = (XMPNode) it.next(); 503 if (!schema.hasChildren()) 504 { 505 it.remove(); 506 } 507 } 508 } 509 510 511 /** 512 * The outermost call is special. The names almost certainly differ. The 513 * qualifiers (and hence options) will differ for an alias to the x-default 514 * item of a langAlt array. 515 * 516 * @param aliasNode the alias node 517 * @param baseNode the base node of the alias 518 * @param outerCall marks the outer call of the recursion 519 * @throws XMPException Forwards XMP errors 520 */ 521 private static void compareAliasedSubtrees(XMPNode aliasNode, XMPNode baseNode, 522 boolean outerCall) throws XMPException 523 { 524 if (!aliasNode.getValue().equals(baseNode.getValue()) || 525 aliasNode.getChildrenLength() != baseNode.getChildrenLength()) 526 { 527 throw new XMPException("Mismatch between alias and base nodes", XMPError.BADXMP); 528 } 529 530 if ( 531 !outerCall && 532 (!aliasNode.getName().equals(baseNode.getName()) || 533 !aliasNode.getOptions().equals(baseNode.getOptions()) || 534 aliasNode.getQualifierLength() != baseNode.getQualifierLength()) 535 ) 536 { 537 throw new XMPException("Mismatch between alias and base nodes", 538 XMPError.BADXMP); 539 } 540 541 for (Iterator an = aliasNode.iterateChildren(), 542 bn = baseNode.iterateChildren(); 543 an.hasNext() && bn.hasNext();) 544 { 545 XMPNode aliasChild = (XMPNode) an.next(); 546 XMPNode baseChild = (XMPNode) bn.next(); 547 compareAliasedSubtrees (aliasChild, baseChild, false); 548 } 549 550 551 for (Iterator an = aliasNode.iterateQualifier(), 552 bn = baseNode.iterateQualifier(); 553 an.hasNext() && bn.hasNext();) 554 { 555 XMPNode aliasQual = (XMPNode) an.next(); 556 XMPNode baseQual = (XMPNode) bn.next(); 557 compareAliasedSubtrees (aliasQual, baseQual, false); 558 } 559 } 560 561 562 /** 563 * The initial support for WAV files mapped a legacy ID3 audio copyright 564 * into a new xmpDM:copyright property. This is special case code to migrate 565 * that into dc:rights['x-default']. The rules: 566 * 567 * <pre> 568 * 1. If there is no dc:rights array, or an empty array - 569 * Create one with dc:rights['x-default'] set from double linefeed and xmpDM:copyright. 570 * 571 * 2. If there is a dc:rights array but it has no x-default item - 572 * Create an x-default item as a copy of the first item then apply rule #3. 573 * 574 * 3. If there is a dc:rights array with an x-default item, 575 * Look for a double linefeed in the value. 576 * A. If no double linefeed, compare the x-default value to the xmpDM:copyright value. 577 * A1. If they match then leave the x-default value alone. 578 * A2. Otherwise, append a double linefeed and 579 * the xmpDM:copyright value to the x-default value. 580 * B. If there is a double linefeed, compare the trailing text to the xmpDM:copyright value. 581 * B1. If they match then leave the x-default value alone. 582 * B2. Otherwise, replace the trailing x-default text with the xmpDM:copyright value. 583 * 584 * 4. In all cases, delete the xmpDM:copyright property. 585 * </pre> 586 * 587 * @param xmp the metadata object 588 * @param dmCopyright the "dm:copyright"-property 589 */ 590 private static void migrateAudioCopyright (XMPMeta xmp, XMPNode dmCopyright) 591 { 592 try 593 { 594 XMPNode dcSchema = XMPNodeUtils.findSchemaNode( 595 ((XMPMetaImpl) xmp).getRoot(), XMPConst.NS_DC, true); 596 597 String dmValue = dmCopyright.getValue(); 598 String doubleLF = "\n\n"; 599 600 XMPNode dcRightsArray = XMPNodeUtils.findChildNode (dcSchema, "dc:rights", false); 601 602 if (dcRightsArray == null || !dcRightsArray.hasChildren()) 603 { 604 // 1. No dc:rights array, create from double linefeed and xmpDM:copyright. 605 dmValue = doubleLF + dmValue; 606 xmp.setLocalizedText(XMPConst.NS_DC, "rights", "", XMPConst.X_DEFAULT, dmValue, 607 null); 608 } 609 else 610 { 611 int xdIndex = XMPNodeUtils.lookupLanguageItem(dcRightsArray, XMPConst.X_DEFAULT); 612 613 if (xdIndex < 0) 614 { 615 // 2. No x-default item, create from the first item. 616 String firstValue = dcRightsArray.getChild(1).getValue(); 617 xmp.setLocalizedText (XMPConst.NS_DC, "rights", "", XMPConst.X_DEFAULT, 618 firstValue, null); 619 xdIndex = XMPNodeUtils.lookupLanguageItem(dcRightsArray, XMPConst.X_DEFAULT); 620 } 621 622 // 3. Look for a double linefeed in the x-default value. 623 XMPNode defaultNode = dcRightsArray.getChild(xdIndex); 624 String defaultValue = defaultNode.getValue(); 625 int lfPos = defaultValue.indexOf(doubleLF); 626 627 if (lfPos < 0) 628 { 629 // 3A. No double LF, compare whole values. 630 if (!dmValue.equals(defaultValue)) 631 { 632 // 3A2. Append the xmpDM:copyright to the x-default 633 // item. 634 defaultNode.setValue(defaultValue + doubleLF + dmValue); 635 } 636 } 637 else 638 { 639 // 3B. Has double LF, compare the tail. 640 if (!defaultValue.substring(lfPos + 2).equals(dmValue)) 641 { 642 // 3B2. Replace the x-default tail. 643 defaultNode.setValue(defaultValue.substring(0, lfPos + 2) + dmValue); 644 } 645 } 646 647 } 648 649 // 4. Get rid of the xmpDM:copyright. 650 dmCopyright.getParent().removeChild(dmCopyright); 651 } 652 catch (XMPException e) 653 { 654 // Don't let failures (like a bad dc:rights form) stop other 655 // cleanup. 656 } 657 } 658 659 660 /** 661 * Initializes the map that contains the known arrays, that are fixed by 662 * {@link XMPNormalizer#normalizeDCArrays(XMPNode)}. 663 */ 664 private static void initDCArrays() 665 { 666 dcArrayForms = new HashMap(); 667 668 // Properties supposed to be a "Bag". 669 PropertyOptions bagForm = new PropertyOptions(); 670 bagForm.setArray(true); 671 dcArrayForms.put("dc:contributor", bagForm); 672 dcArrayForms.put("dc:language", bagForm); 673 dcArrayForms.put("dc:publisher", bagForm); 674 dcArrayForms.put("dc:relation", bagForm); 675 dcArrayForms.put("dc:subject", bagForm); 676 dcArrayForms.put("dc:type", bagForm); 677 678 // Properties supposed to be a "Seq". 679 PropertyOptions seqForm = new PropertyOptions(); 680 seqForm.setArray(true); 681 seqForm.setArrayOrdered(true); 682 dcArrayForms.put("dc:creator", seqForm); 683 dcArrayForms.put("dc:date", seqForm); 684 685 // Properties supposed to be an "Alt" in alternative-text form. 686 PropertyOptions altTextForm = new PropertyOptions(); 687 altTextForm.setArray(true); 688 altTextForm.setArrayOrdered(true); 689 altTextForm.setArrayAlternate(true); 690 altTextForm.setArrayAltText(true); 691 dcArrayForms.put("dc:description", altTextForm); 692 dcArrayForms.put("dc:rights", altTextForm); 693 dcArrayForms.put("dc:title", altTextForm); 694 } 695 } 696