1 // ================================================================================================= 2 // ADOBE SYSTEMS INCORPORATED 3 // Copyright 2006 Adobe Systems Incorporated 4 // All Rights Reserved 5 // 6 // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms 7 // of the Adobe license agreement accompanying it. 8 // ================================================================================================= 9 10 package com.adobe.xmp.impl; 11 12 import java.util.List; 13 import java.util.ArrayList; 14 import java.util.Iterator; 15 16 import org.w3c.dom.Attr; 17 import org.w3c.dom.NamedNodeMap; 18 import org.w3c.dom.Node; 19 20 import com.adobe.xmp.XMPConst; 21 import com.adobe.xmp.XMPError; 22 import com.adobe.xmp.XMPException; 23 import com.adobe.xmp.XMPMetaFactory; 24 import com.adobe.xmp.XMPSchemaRegistry; 25 import com.adobe.xmp.options.PropertyOptions; 26 27 28 /** 29 * Parser for "normal" XML serialisation of RDF. 30 * 31 * @since 14.07.2006 32 */ 33 public class ParseRDF implements XMPError, XMPConst 34 { 35 /** */ 36 public static final int RDFTERM_OTHER = 0; 37 /** Start of coreSyntaxTerms. */ 38 public static final int RDFTERM_RDF = 1; 39 /** */ 40 public static final int RDFTERM_ID = 2; 41 /** */ 42 public static final int RDFTERM_ABOUT = 3; 43 /** */ 44 public static final int RDFTERM_PARSE_TYPE = 4; 45 /** */ 46 public static final int RDFTERM_RESOURCE = 5; 47 /** */ 48 public static final int RDFTERM_NODE_ID = 6; 49 /** End of coreSyntaxTerms */ 50 public static final int RDFTERM_DATATYPE = 7; 51 /** Start of additions for syntax Terms. */ 52 public static final int RDFTERM_DESCRIPTION = 8; 53 /** End of of additions for syntaxTerms. */ 54 public static final int RDFTERM_LI = 9; 55 /** Start of oldTerms. */ 56 public static final int RDFTERM_ABOUT_EACH = 10; 57 /** */ 58 public static final int RDFTERM_ABOUT_EACH_PREFIX = 11; 59 /** End of oldTerms. */ 60 public static final int RDFTERM_BAG_ID = 12; 61 /** */ 62 public static final int RDFTERM_FIRST_CORE = RDFTERM_RDF; 63 /** */ 64 public static final int RDFTERM_LAST_CORE = RDFTERM_DATATYPE; 65 /** ! Yes, the syntax terms include the core terms. */ 66 public static final int RDFTERM_FIRST_SYNTAX = RDFTERM_FIRST_CORE; 67 /** */ 68 public static final int RDFTERM_LAST_SYNTAX = RDFTERM_LI; 69 /** */ 70 public static final int RDFTERM_FIRST_OLD = RDFTERM_ABOUT_EACH; 71 /** */ 72 public static final int RDFTERM_LAST_OLD = RDFTERM_BAG_ID; 73 74 /** this prefix is used for default namespaces */ 75 public static final String DEFAULT_PREFIX = "_dflt"; 76 77 78 79 /** 80 * The main parsing method. The XML tree is walked through from the root node and and XMP tree 81 * is created. This is a raw parse, the normalisation of the XMP tree happens outside. 82 * 83 * @param xmlRoot the XML root node 84 * @return Returns an XMP metadata object (not normalized) 85 * @throws XMPException Occurs if the parsing fails for any reason. 86 */ 87 static XMPMetaImpl parse(Node xmlRoot) throws XMPException 88 { 89 XMPMetaImpl xmp = new XMPMetaImpl(); 90 rdf_RDF(xmp, xmlRoot); 91 return xmp; 92 } 93 94 95 /** 96 * Each of these parsing methods is responsible for recognizing an RDF 97 * syntax production and adding the appropriate structure to the XMP tree. 98 * They simply return for success, failures will throw an exception. 99 * 100 * @param xmp the xmp metadata object that is generated 101 * @param rdfRdfNode the top-level xml node 102 * @throws XMPException thown on parsing errors 103 */ 104 static void rdf_RDF(XMPMetaImpl xmp, Node rdfRdfNode) throws XMPException 105 { 106 if (rdfRdfNode.hasAttributes()) 107 { 108 rdf_NodeElementList (xmp, xmp.getRoot(), rdfRdfNode); 109 } 110 else 111 { 112 throw new XMPException("Invalid attributes of rdf:RDF element", BADRDF); 113 } 114 } 115 116 117 /** 118 * 7.2.10 nodeElementList<br> 119 * ws* ( nodeElement ws* )* 120 * 121 * Note: this method is only called from the rdf:RDF-node (top level) 122 * @param xmp the xmp metadata object that is generated 123 * @param xmpParent the parent xmp node 124 * @param rdfRdfNode the top-level xml node 125 * @throws XMPException thown on parsing errors 126 */ 127 private static void rdf_NodeElementList(XMPMetaImpl xmp, XMPNode xmpParent, Node rdfRdfNode) 128 throws XMPException 129 { 130 for (int i = 0; i < rdfRdfNode.getChildNodes().getLength(); i++) 131 { 132 Node child = rdfRdfNode.getChildNodes().item(i); 133 // filter whitespaces (and all text nodes) 134 if (!isWhitespaceNode(child)) 135 { 136 rdf_NodeElement (xmp, xmpParent, child, true); 137 } 138 } 139 } 140 141 142 /** 143 * 7.2.5 nodeElementURIs 144 * anyURI - ( coreSyntaxTerms | rdf:li | oldTerms ) 145 * 146 * 7.2.11 nodeElement 147 * start-element ( URI == nodeElementURIs, 148 * attributes == set ( ( idAttr | nodeIdAttr | aboutAttr )?, propertyAttr* ) ) 149 * propertyEltList 150 * end-element() 151 * 152 * A node element URI is rdf:Description or anything else that is not an RDF 153 * term. 154 * 155 * @param xmp the xmp metadata object that is generated 156 * @param xmpParent the parent xmp node 157 * @param xmlNode the currently processed XML node 158 * @param isTopLevel Flag if the node is a top-level node 159 * @throws XMPException thown on parsing errors 160 */ 161 private static void rdf_NodeElement(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode, 162 boolean isTopLevel) throws XMPException 163 { 164 int nodeTerm = getRDFTermKind (xmlNode); 165 if (nodeTerm != RDFTERM_DESCRIPTION && nodeTerm != RDFTERM_OTHER) 166 { 167 throw new XMPException("Node element must be rdf:Description or typed node", 168 BADRDF); 169 } 170 else if (isTopLevel && nodeTerm == RDFTERM_OTHER) 171 { 172 throw new XMPException("Top level typed node not allowed", BADXMP); 173 } 174 else 175 { 176 rdf_NodeElementAttrs (xmp, xmpParent, xmlNode, isTopLevel); 177 rdf_PropertyElementList (xmp, xmpParent, xmlNode, isTopLevel); 178 } 179 180 } 181 182 183 /** 184 * 185 * 7.2.7 propertyAttributeURIs 186 * anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms ) 187 * 188 * 7.2.11 nodeElement 189 * start-element ( URI == nodeElementURIs, 190 * attributes == set ( ( idAttr | nodeIdAttr | aboutAttr )?, propertyAttr* ) ) 191 * propertyEltList 192 * end-element() 193 * 194 * Process the attribute list for an RDF node element. A property attribute URI is 195 * anything other than an RDF term. The rdf:ID and rdf:nodeID attributes are simply ignored, 196 * as are rdf:about attributes on inner nodes. 197 * 198 * @param xmp the xmp metadata object that is generated 199 * @param xmpParent the parent xmp node 200 * @param xmlNode the currently processed XML node 201 * @param isTopLevel Flag if the node is a top-level node 202 * @throws XMPException thown on parsing errors 203 */ 204 private static void rdf_NodeElementAttrs(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode, 205 boolean isTopLevel) throws XMPException 206 { 207 // Used to detect attributes that are mutually exclusive. 208 int exclusiveAttrs = 0; 209 210 for (int i = 0; i < xmlNode.getAttributes().getLength(); i++) 211 { 212 Node attribute = xmlNode.getAttributes().item(i); 213 214 // quick hack, ns declarations do not appear in C++ 215 // ignore "ID" without namespace 216 if ("xmlns".equals(attribute.getPrefix()) || 217 (attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName()))) 218 { 219 continue; 220 } 221 222 int attrTerm = getRDFTermKind(attribute); 223 224 switch (attrTerm) 225 { 226 case RDFTERM_ID: 227 case RDFTERM_NODE_ID: 228 case RDFTERM_ABOUT: 229 if (exclusiveAttrs > 0) 230 { 231 throw new XMPException("Mutally exclusive about, ID, nodeID attributes", 232 BADRDF); 233 } 234 235 exclusiveAttrs++; 236 237 if (isTopLevel && (attrTerm == RDFTERM_ABOUT)) 238 { 239 // This is the rdf:about attribute on a top level node. Set 240 // the XMP tree name if 241 // it doesn't have a name yet. Make sure this name matches 242 // the XMP tree name. 243 if (xmpParent.getName() != null && xmpParent.getName().length() > 0) 244 { 245 if (!xmpParent.getName().equals(attribute.getNodeValue())) 246 { 247 throw new XMPException("Mismatched top level rdf:about values", 248 BADXMP); 249 } 250 } 251 else 252 { 253 xmpParent.setName(attribute.getNodeValue()); 254 } 255 } 256 break; 257 258 case RDFTERM_OTHER: 259 addChildNode(xmp, xmpParent, attribute, attribute.getNodeValue(), isTopLevel); 260 break; 261 262 default: 263 throw new XMPException("Invalid nodeElement attribute", BADRDF); 264 } 265 266 } 267 } 268 269 270 /** 271 * 7.2.13 propertyEltList 272 * ws* ( propertyElt ws* )* 273 * 274 * @param xmp the xmp metadata object that is generated 275 * @param xmpParent the parent xmp node 276 * @param xmlParent the currently processed XML node 277 * @param isTopLevel Flag if the node is a top-level node 278 * @throws XMPException thown on parsing errors 279 */ 280 private static void rdf_PropertyElementList(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlParent, 281 boolean isTopLevel) throws XMPException 282 { 283 for (int i = 0; i < xmlParent.getChildNodes().getLength(); i++) 284 { 285 Node currChild = xmlParent.getChildNodes().item(i); 286 if (isWhitespaceNode(currChild)) 287 { 288 continue; 289 } 290 else if (currChild.getNodeType() != Node.ELEMENT_NODE) 291 { 292 throw new XMPException("Expected property element node not found", BADRDF); 293 } 294 else 295 { 296 rdf_PropertyElement(xmp, xmpParent, currChild, isTopLevel); 297 } 298 } 299 } 300 301 302 /** 303 * 7.2.14 propertyElt 304 * 305 * resourcePropertyElt | literalPropertyElt | parseTypeLiteralPropertyElt | 306 * parseTypeResourcePropertyElt | parseTypeCollectionPropertyElt | 307 * parseTypeOtherPropertyElt | emptyPropertyElt 308 * 309 * 7.2.15 resourcePropertyElt 310 * start-element ( URI == propertyElementURIs, attributes == set ( idAttr? ) ) 311 * ws* nodeElement ws* 312 * end-element() 313 * 314 * 7.2.16 literalPropertyElt 315 * start-element ( 316 * URI == propertyElementURIs, attributes == set ( idAttr?, datatypeAttr?) ) 317 * text() 318 * end-element() 319 * 320 * 7.2.17 parseTypeLiteralPropertyElt 321 * start-element ( 322 * URI == propertyElementURIs, attributes == set ( idAttr?, parseLiteral ) ) 323 * literal 324 * end-element() 325 * 326 * 7.2.18 parseTypeResourcePropertyElt 327 * start-element ( 328 * URI == propertyElementURIs, attributes == set ( idAttr?, parseResource ) ) 329 * propertyEltList 330 * end-element() 331 * 332 * 7.2.19 parseTypeCollectionPropertyElt 333 * start-element ( 334 * URI == propertyElementURIs, attributes == set ( idAttr?, parseCollection ) ) 335 * nodeElementList 336 * end-element() 337 * 338 * 7.2.20 parseTypeOtherPropertyElt 339 * start-element ( URI == propertyElementURIs, attributes == set ( idAttr?, parseOther ) ) 340 * propertyEltList 341 * end-element() 342 * 343 * 7.2.21 emptyPropertyElt 344 * start-element ( URI == propertyElementURIs, 345 * attributes == set ( idAttr?, ( resourceAttr | nodeIdAttr )?, propertyAttr* ) ) 346 * end-element() 347 * 348 * The various property element forms are not distinguished by the XML element name, 349 * but by their attributes for the most part. The exceptions are resourcePropertyElt and 350 * literalPropertyElt. They are distinguished by their XML element content. 351 * 352 * NOTE: The RDF syntax does not explicitly include the xml:lang attribute although it can 353 * appear in many of these. We have to allow for it in the attibute counts below. 354 * 355 * @param xmp the xmp metadata object that is generated 356 * @param xmpParent the parent xmp node 357 * @param xmlNode the currently processed XML node 358 * @param isTopLevel Flag if the node is a top-level node 359 * @throws XMPException thown on parsing errors 360 */ 361 private static void rdf_PropertyElement(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode, 362 boolean isTopLevel) throws XMPException 363 { 364 int nodeTerm = getRDFTermKind (xmlNode); 365 if (!isPropertyElementName(nodeTerm)) 366 { 367 throw new XMPException("Invalid property element name", BADRDF); 368 } 369 370 // remove the namespace-definitions from the list 371 NamedNodeMap attributes = xmlNode.getAttributes(); 372 List nsAttrs = null; 373 for (int i = 0; i < attributes.getLength(); i++) 374 { 375 Node attribute = attributes.item(i); 376 if ("xmlns".equals(attribute.getPrefix()) || 377 (attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName()))) 378 { 379 if (nsAttrs == null) 380 { 381 nsAttrs = new ArrayList(); 382 } 383 nsAttrs.add(attribute.getNodeName()); 384 } 385 } 386 if (nsAttrs != null) 387 { 388 for (Iterator it = nsAttrs.iterator(); it.hasNext();) 389 { 390 String ns = (String) it.next(); 391 attributes.removeNamedItem(ns); 392 } 393 } 394 395 396 if (attributes.getLength() > 3) 397 { 398 // Only an emptyPropertyElt can have more than 3 attributes. 399 rdf_EmptyPropertyElement(xmp, xmpParent, xmlNode, isTopLevel); 400 } 401 else 402 { 403 // Look through the attributes for one that isn't rdf:ID or xml:lang, 404 // it will usually tell what we should be dealing with. 405 // The called routines must verify their specific syntax! 406 407 for (int i = 0; i < attributes.getLength(); i++) 408 { 409 Node attribute = attributes.item(i); 410 String attrLocal = attribute.getLocalName(); 411 String attrNS = attribute.getNamespaceURI(); 412 String attrValue = attribute.getNodeValue(); 413 if (!(XML_LANG.equals(attribute.getNodeName()) && 414 !("ID".equals(attrLocal) && NS_RDF.equals(attrNS)))) 415 { 416 if ("datatype".equals(attrLocal) && NS_RDF.equals(attrNS)) 417 { 418 rdf_LiteralPropertyElement (xmp, xmpParent, xmlNode, isTopLevel); 419 } 420 else if (!("parseType".equals(attrLocal) && NS_RDF.equals(attrNS))) 421 { 422 rdf_EmptyPropertyElement (xmp, xmpParent, xmlNode, isTopLevel); 423 } 424 else if ("Literal".equals(attrValue)) 425 { 426 rdf_ParseTypeLiteralPropertyElement(); 427 } 428 else if ("Resource".equals(attrValue)) 429 { 430 rdf_ParseTypeResourcePropertyElement(xmp, xmpParent, xmlNode, isTopLevel); 431 } 432 else if ("Collection".equals(attrValue)) 433 { 434 rdf_ParseTypeCollectionPropertyElement(); 435 } 436 else 437 { 438 rdf_ParseTypeOtherPropertyElement(); 439 } 440 441 return; 442 } 443 } 444 445 // Only rdf:ID and xml:lang, could be a resourcePropertyElt, a literalPropertyElt, 446 // or an emptyPropertyElt. Look at the child XML nodes to decide which. 447 448 if (xmlNode.hasChildNodes()) 449 { 450 for (int i = 0; i < xmlNode.getChildNodes().getLength(); i++) 451 { 452 Node currChild = xmlNode.getChildNodes().item(i); 453 if (currChild.getNodeType() != Node.TEXT_NODE) 454 { 455 rdf_ResourcePropertyElement (xmp, xmpParent, xmlNode, isTopLevel); 456 return; 457 } 458 } 459 460 rdf_LiteralPropertyElement (xmp, xmpParent, xmlNode, isTopLevel); 461 } 462 else 463 { 464 rdf_EmptyPropertyElement (xmp, xmpParent, xmlNode, isTopLevel); 465 } 466 } 467 } 468 469 470 /** 471 * 7.2.15 resourcePropertyElt 472 * start-element ( URI == propertyElementURIs, attributes == set ( idAttr? ) ) 473 * ws* nodeElement ws* 474 * end-element() 475 * 476 * This handles structs using an rdf:Description node, 477 * arrays using rdf:Bag/Seq/Alt, and typedNodes. It also catches and cleans up qualified 478 * properties written with rdf:Description and rdf:value. 479 * 480 * @param xmp the xmp metadata object that is generated 481 * @param xmpParent the parent xmp node 482 * @param xmlNode the currently processed XML node 483 * @param isTopLevel Flag if the node is a top-level node 484 * @throws XMPException thown on parsing errors 485 */ 486 private static void rdf_ResourcePropertyElement(XMPMetaImpl xmp, XMPNode xmpParent, 487 Node xmlNode, boolean isTopLevel) throws XMPException 488 { 489 if (isTopLevel && "iX:changes".equals(xmlNode.getNodeName())) 490 { 491 // Strip old "punchcard" chaff which has on the prefix "iX:". 492 return; 493 } 494 495 XMPNode newCompound = addChildNode(xmp, xmpParent, xmlNode, "", isTopLevel); 496 497 // walk through the attributes 498 for (int i = 0; i < xmlNode.getAttributes().getLength(); i++) 499 { 500 Node attribute = xmlNode.getAttributes().item(i); 501 if ("xmlns".equals(attribute.getPrefix()) || 502 (attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName()))) 503 { 504 continue; 505 } 506 507 String attrLocal = attribute.getLocalName(); 508 String attrNS = attribute.getNamespaceURI(); 509 if (XML_LANG.equals(attribute.getNodeName())) 510 { 511 addQualifierNode (newCompound, XML_LANG, attribute.getNodeValue()); 512 } 513 else if ("ID".equals(attrLocal) && NS_RDF.equals(attrNS)) 514 { 515 continue; // Ignore all rdf:ID attributes. 516 } 517 else 518 { 519 throw new XMPException( 520 "Invalid attribute for resource property element", BADRDF); 521 } 522 } 523 524 // walk through the children 525 526 Node currChild = null; 527 boolean found = false; 528 int i; 529 for (i = 0; i < xmlNode.getChildNodes().getLength(); i++) 530 { 531 currChild = xmlNode.getChildNodes().item(i); 532 if (!isWhitespaceNode(currChild)) 533 { 534 if (currChild.getNodeType() == Node.ELEMENT_NODE && !found) 535 { 536 boolean isRDF = NS_RDF.equals(currChild.getNamespaceURI()); 537 String childLocal = currChild.getLocalName(); 538 539 if (isRDF && "Bag".equals(childLocal)) 540 { 541 newCompound.getOptions().setArray(true); 542 } 543 else if (isRDF && "Seq".equals(childLocal)) 544 { 545 newCompound.getOptions().setArray(true).setArrayOrdered(true); 546 } 547 else if (isRDF && "Alt".equals(childLocal)) 548 { 549 newCompound.getOptions().setArray(true).setArrayOrdered(true) 550 .setArrayAlternate(true); 551 } 552 else 553 { 554 newCompound.getOptions().setStruct(true); 555 if (!isRDF && !"Description".equals(childLocal)) 556 { 557 String typeName = currChild.getNamespaceURI(); 558 if (typeName == null) 559 { 560 throw new XMPException( 561 "All XML elements must be in a namespace", BADXMP); 562 } 563 typeName += ':' + childLocal; 564 addQualifierNode (newCompound, "rdf:type", typeName); 565 } 566 } 567 568 rdf_NodeElement (xmp, newCompound, currChild, false); 569 570 if (newCompound.getHasValueChild()) 571 { 572 fixupQualifiedNode (newCompound); 573 } 574 else if (newCompound.getOptions().isArrayAlternate()) 575 { 576 XMPNodeUtils.detectAltText(newCompound); 577 } 578 579 found = true; 580 } 581 else if (found) 582 { 583 // found second child element 584 throw new XMPException( 585 "Invalid child of resource property element", BADRDF); 586 } 587 else 588 { 589 throw new XMPException( 590 "Children of resource property element must be XML elements", BADRDF); 591 } 592 } 593 } 594 595 if (!found) 596 { 597 // didn't found any child elements 598 throw new XMPException("Missing child of resource property element", BADRDF); 599 } 600 } 601 602 603 /** 604 * 7.2.16 literalPropertyElt 605 * start-element ( URI == propertyElementURIs, 606 * attributes == set ( idAttr?, datatypeAttr?) ) 607 * text() 608 * end-element() 609 * 610 * Add a leaf node with the text value and qualifiers for the attributes. 611 * @param xmp the xmp metadata object that is generated 612 * @param xmpParent the parent xmp node 613 * @param xmlNode the currently processed XML node 614 * @param isTopLevel Flag if the node is a top-level node 615 * @throws XMPException thown on parsing errors 616 */ 617 private static void rdf_LiteralPropertyElement(XMPMetaImpl xmp, XMPNode xmpParent, 618 Node xmlNode, boolean isTopLevel) throws XMPException 619 { 620 XMPNode newChild = addChildNode (xmp, xmpParent, xmlNode, null, isTopLevel); 621 622 for (int i = 0; i < xmlNode.getAttributes().getLength(); i++) 623 { 624 Node attribute = xmlNode.getAttributes().item(i); 625 if ("xmlns".equals(attribute.getPrefix()) || 626 (attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName()))) 627 { 628 continue; 629 } 630 631 String attrNS = attribute.getNamespaceURI(); 632 String attrLocal = attribute.getLocalName(); 633 if (XML_LANG.equals(attribute.getNodeName())) 634 { 635 addQualifierNode(newChild, XML_LANG, attribute.getNodeValue()); 636 } 637 else if (NS_RDF.equals(attrNS) && 638 ("ID".equals(attrLocal) || "datatype".equals(attrLocal))) 639 { 640 continue; // Ignore all rdf:ID and rdf:datatype attributes. 641 } 642 else 643 { 644 throw new XMPException( 645 "Invalid attribute for literal property element", BADRDF); 646 } 647 } 648 String textValue = ""; 649 for (int i = 0; i < xmlNode.getChildNodes().getLength(); i++) 650 { 651 Node child = xmlNode.getChildNodes().item(i); 652 if (child.getNodeType() == Node.TEXT_NODE) 653 { 654 textValue += child.getNodeValue(); 655 } 656 else 657 { 658 throw new XMPException("Invalid child of literal property element", BADRDF); 659 } 660 } 661 newChild.setValue(textValue); 662 } 663 664 665 /** 666 * 7.2.17 parseTypeLiteralPropertyElt 667 * start-element ( URI == propertyElementURIs, 668 * attributes == set ( idAttr?, parseLiteral ) ) 669 * literal 670 * end-element() 671 * 672 * @throws XMPException thown on parsing errors 673 */ 674 private static void rdf_ParseTypeLiteralPropertyElement() throws XMPException 675 { 676 throw new XMPException("ParseTypeLiteral property element not allowed", BADXMP); 677 } 678 679 680 /** 681 * 7.2.18 parseTypeResourcePropertyElt 682 * start-element ( URI == propertyElementURIs, 683 * attributes == set ( idAttr?, parseResource ) ) 684 * propertyEltList 685 * end-element() 686 * 687 * Add a new struct node with a qualifier for the possible rdf:ID attribute. 688 * Then process the XML child nodes to get the struct fields. 689 * 690 * @param xmp the xmp metadata object that is generated 691 * @param xmpParent the parent xmp node 692 * @param xmlNode the currently processed XML node 693 * @param isTopLevel Flag if the node is a top-level node 694 * @throws XMPException thown on parsing errors 695 */ 696 private static void rdf_ParseTypeResourcePropertyElement(XMPMetaImpl xmp, XMPNode xmpParent, 697 Node xmlNode, boolean isTopLevel) throws XMPException 698 { 699 XMPNode newStruct = addChildNode (xmp, xmpParent, xmlNode, "", isTopLevel); 700 701 newStruct.getOptions().setStruct(true); 702 703 for (int i = 0; i < xmlNode.getAttributes().getLength(); i++) 704 { 705 Node attribute = xmlNode.getAttributes().item(i); 706 if ("xmlns".equals(attribute.getPrefix()) || 707 (attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName()))) 708 { 709 continue; 710 } 711 712 String attrLocal = attribute.getLocalName(); 713 String attrNS = attribute.getNamespaceURI(); 714 if (XML_LANG.equals(attribute.getNodeName())) 715 { 716 addQualifierNode (newStruct, XML_LANG, attribute.getNodeValue()); 717 } 718 else if (NS_RDF.equals(attrNS) && 719 ("ID".equals(attrLocal) || "parseType".equals(attrLocal))) 720 { 721 continue; // The caller ensured the value is "Resource". 722 // Ignore all rdf:ID attributes. 723 } 724 else 725 { 726 throw new XMPException("Invalid attribute for ParseTypeResource property element", 727 BADRDF); 728 } 729 } 730 731 rdf_PropertyElementList (xmp, newStruct, xmlNode, false); 732 733 if (newStruct.getHasValueChild()) 734 { 735 fixupQualifiedNode (newStruct); 736 } 737 } 738 739 740 /** 741 * 7.2.19 parseTypeCollectionPropertyElt 742 * start-element ( URI == propertyElementURIs, 743 * attributes == set ( idAttr?, parseCollection ) ) 744 * nodeElementList 745 * end-element() 746 * 747 * @throws XMPException thown on parsing errors 748 */ 749 private static void rdf_ParseTypeCollectionPropertyElement() throws XMPException 750 { 751 throw new XMPException("ParseTypeCollection property element not allowed", BADXMP); 752 } 753 754 755 /** 756 * 7.2.20 parseTypeOtherPropertyElt 757 * start-element ( URI == propertyElementURIs, attributes == set ( idAttr?, parseOther ) ) 758 * propertyEltList 759 * end-element() 760 * 761 * @throws XMPException thown on parsing errors 762 */ 763 private static void rdf_ParseTypeOtherPropertyElement() throws XMPException 764 { 765 throw new XMPException("ParseTypeOther property element not allowed", BADXMP); 766 } 767 768 769 /** 770 * 7.2.21 emptyPropertyElt 771 * start-element ( URI == propertyElementURIs, 772 * attributes == set ( 773 * idAttr?, ( resourceAttr | nodeIdAttr )?, propertyAttr* ) ) 774 * end-element() 775 * 776 * <ns:Prop1/> <!-- a simple property with an empty value --> 777 * <ns:Prop2 rdf:resource="http: *www.adobe.com/"/> <!-- a URI value --> 778 * <ns:Prop3 rdf:value="..." ns:Qual="..."/> <!-- a simple qualified property --> 779 * <ns:Prop4 ns:Field1="..." ns:Field2="..."/> <!-- a struct with simple fields --> 780 * 781 * An emptyPropertyElt is an element with no contained content, just a possibly empty set of 782 * attributes. An emptyPropertyElt can represent three special cases of simple XMP properties: a 783 * simple property with an empty value (ns:Prop1), a simple property whose value is a URI 784 * (ns:Prop2), or a simple property with simple qualifiers (ns:Prop3). 785 * An emptyPropertyElt can also represent an XMP struct whose fields are all simple and 786 * unqualified (ns:Prop4). 787 * 788 * It is an error to use both rdf:value and rdf:resource - that can lead to invalid RDF in the 789 * verbose form written using a literalPropertyElt. 790 * 791 * The XMP mapping for an emptyPropertyElt is a bit different from generic RDF, partly for 792 * design reasons and partly for historical reasons. The XMP mapping rules are: 793 * <ol> 794 * <li> If there is an rdf:value attribute then this is a simple property 795 * with a text value. 796 * All other attributes are qualifiers. 797 * <li> If there is an rdf:resource attribute then this is a simple property 798 * with a URI value. 799 * All other attributes are qualifiers. 800 * <li> If there are no attributes other than xml:lang, rdf:ID, or rdf:nodeID 801 * then this is a simple 802 * property with an empty value. 803 * <li> Otherwise this is a struct, the attributes other than xml:lang, rdf:ID, 804 * or rdf:nodeID are fields. 805 * </ol> 806 * 807 * @param xmp the xmp metadata object that is generated 808 * @param xmpParent the parent xmp node 809 * @param xmlNode the currently processed XML node 810 * @param isTopLevel Flag if the node is a top-level node 811 * @throws XMPException thown on parsing errors 812 */ 813 private static void rdf_EmptyPropertyElement(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode, 814 boolean isTopLevel) throws XMPException 815 { 816 boolean hasPropertyAttrs = false; 817 boolean hasResourceAttr = false; 818 boolean hasNodeIDAttr = false; 819 boolean hasValueAttr = false; 820 821 Node valueNode = null; // ! Can come from rdf:value or rdf:resource. 822 823 if (xmlNode.hasChildNodes()) 824 { 825 throw new XMPException( 826 "Nested content not allowed with rdf:resource or property attributes", 827 BADRDF); 828 } 829 830 // First figure out what XMP this maps to and remember the XML node for a simple value. 831 for (int i = 0; i < xmlNode.getAttributes().getLength(); i++) 832 { 833 Node attribute = xmlNode.getAttributes().item(i); 834 if ("xmlns".equals(attribute.getPrefix()) || 835 (attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName()))) 836 { 837 continue; 838 } 839 840 int attrTerm = getRDFTermKind (attribute); 841 842 switch (attrTerm) 843 { 844 case RDFTERM_ID : 845 // Nothing to do. 846 break; 847 848 case RDFTERM_RESOURCE : 849 if (hasNodeIDAttr) 850 { 851 throw new XMPException( 852 "Empty property element can't have both rdf:resource and rdf:nodeID", 853 BADRDF); 854 } 855 else if (hasValueAttr) 856 { 857 throw new XMPException( 858 "Empty property element can't have both rdf:value and rdf:resource", 859 BADXMP); 860 } 861 862 hasResourceAttr = true; 863 if (!hasValueAttr) 864 { 865 valueNode = attribute; 866 } 867 break; 868 869 case RDFTERM_NODE_ID: 870 if (hasResourceAttr) 871 { 872 throw new XMPException( 873 "Empty property element can't have both rdf:resource and rdf:nodeID", 874 BADRDF); 875 } 876 hasNodeIDAttr = true; 877 break; 878 879 case RDFTERM_OTHER: 880 if ("value".equals(attribute.getLocalName()) 881 && NS_RDF.equals(attribute.getNamespaceURI())) 882 { 883 if (hasResourceAttr) 884 { 885 throw new XMPException( 886 "Empty property element can't have both rdf:value and rdf:resource", 887 BADXMP); 888 } 889 hasValueAttr = true; 890 valueNode = attribute; 891 } 892 else if (!XML_LANG.equals(attribute.getNodeName())) 893 { 894 hasPropertyAttrs = true; 895 } 896 break; 897 898 default: 899 throw new XMPException("Unrecognized attribute of empty property element", 900 BADRDF); 901 } 902 } 903 904 // Create the right kind of child node and visit the attributes again 905 // to add the fields or qualifiers. 906 // ! Because of implementation vagaries, 907 // the xmpParent is the tree root for top level properties. 908 // ! The schema is found, created if necessary, by addChildNode. 909 910 XMPNode childNode = addChildNode(xmp, xmpParent, xmlNode, "", isTopLevel); 911 boolean childIsStruct = false; 912 913 if (hasValueAttr || hasResourceAttr) 914 { 915 childNode.setValue(valueNode != null ? valueNode.getNodeValue() : ""); 916 if (!hasValueAttr) 917 { 918 // ! Might have both rdf:value and rdf:resource. 919 childNode.getOptions().setURI(true); 920 } 921 } 922 else if (hasPropertyAttrs) 923 { 924 childNode.getOptions().setStruct(true); 925 childIsStruct = true; 926 } 927 928 for (int i = 0; i < xmlNode.getAttributes().getLength(); i++) 929 { 930 Node attribute = xmlNode.getAttributes().item(i); 931 if (attribute == valueNode || 932 "xmlns".equals(attribute.getPrefix()) || 933 (attribute.getPrefix() == null && "xmlns".equals(attribute.getNodeName()))) 934 { 935 continue; // Skip the rdf:value or rdf:resource attribute holding the value. 936 } 937 938 int attrTerm = getRDFTermKind (attribute); 939 940 switch (attrTerm) 941 { 942 case RDFTERM_ID : 943 case RDFTERM_NODE_ID : 944 break; // Ignore all rdf:ID and rdf:nodeID attributes. 945 946 case RDFTERM_RESOURCE : 947 addQualifierNode(childNode, "rdf:resource", attribute.getNodeValue()); 948 break; 949 950 case RDFTERM_OTHER : 951 if (!childIsStruct) 952 { 953 addQualifierNode( 954 childNode, attribute.getNodeName(), attribute.getNodeValue()); 955 } 956 else if (XML_LANG.equals(attribute.getNodeName())) 957 { 958 addQualifierNode (childNode, XML_LANG, attribute.getNodeValue()); 959 } 960 else 961 { 962 addChildNode (xmp, childNode, attribute, attribute.getNodeValue(), false); 963 } 964 break; 965 966 default : 967 throw new XMPException("Unrecognized attribute of empty property element", 968 BADRDF); 969 } 970 971 } 972 } 973 974 975 /** 976 * Adds a child node. 977 * 978 * @param xmp the xmp metadata object that is generated 979 * @param xmpParent the parent xmp node 980 * @param xmlNode the currently processed XML node 981 * @param value Node value 982 * @param isTopLevel Flag if the node is a top-level node 983 * @return Returns the newly created child node. 984 * @throws XMPException thown on parsing errors 985 */ 986 private static XMPNode addChildNode(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode, 987 String value, boolean isTopLevel) throws XMPException 988 { 989 XMPSchemaRegistry registry = XMPMetaFactory.getSchemaRegistry(); 990 String namespace = xmlNode.getNamespaceURI(); 991 String childName; 992 if (namespace != null) 993 { 994 if (NS_DC_DEPRECATED.equals(namespace)) 995 { 996 // Fix a legacy DC namespace 997 namespace = NS_DC; 998 } 999 1000 String prefix = registry.getNamespacePrefix(namespace); 1001 if (prefix == null) 1002 { 1003 prefix = xmlNode.getPrefix() != null ? xmlNode.getPrefix() : DEFAULT_PREFIX; 1004 prefix = registry.registerNamespace(namespace, prefix); 1005 } 1006 childName = prefix + xmlNode.getLocalName(); 1007 } 1008 else 1009 { 1010 throw new XMPException( 1011 "XML namespace required for all elements and attributes", BADRDF); 1012 } 1013 1014 1015 // create schema node if not already there 1016 PropertyOptions childOptions = new PropertyOptions(); 1017 boolean isAlias = false; 1018 if (isTopLevel) 1019 { 1020 // Lookup the schema node, adjust the XMP parent pointer. 1021 // Incoming parent must be the tree root. 1022 XMPNode schemaNode = XMPNodeUtils.findSchemaNode(xmp.getRoot(), namespace, 1023 DEFAULT_PREFIX, true); 1024 schemaNode.setImplicit(false); // Clear the implicit node bit. 1025 // need runtime check for proper 32 bit code. 1026 xmpParent = schemaNode; 1027 1028 // If this is an alias set the alias flag in the node 1029 // and the hasAliases flag in the tree. 1030 if (registry.findAlias(childName) != null) 1031 { 1032 isAlias = true; 1033 xmp.getRoot().setHasAliases(true); 1034 schemaNode.setHasAliases(true); 1035 } 1036 } 1037 1038 1039 // Make sure that this is not a duplicate of a named node. 1040 boolean isArrayItem = "rdf:li".equals(childName); 1041 boolean isValueNode = "rdf:value".equals(childName); 1042 1043 // Create XMP node and so some checks 1044 XMPNode newChild = new XMPNode( 1045 childName, value, childOptions); 1046 newChild.setAlias(isAlias); 1047 1048 // Add the new child to the XMP parent node, a value node first. 1049 if (!isValueNode) 1050 { 1051 xmpParent.addChild(newChild); 1052 } 1053 else 1054 { 1055 xmpParent.addChild(1, newChild); 1056 } 1057 1058 1059 if (isValueNode) 1060 { 1061 if (isTopLevel || !xmpParent.getOptions().isStruct()) 1062 { 1063 throw new XMPException("Misplaced rdf:value element", BADRDF); 1064 } 1065 xmpParent.setHasValueChild(true); 1066 } 1067 1068 if (isArrayItem) 1069 { 1070 if (!xmpParent.getOptions().isArray()) 1071 { 1072 throw new XMPException("Misplaced rdf:li element", BADRDF); 1073 } 1074 newChild.setName(ARRAY_ITEM_NAME); 1075 } 1076 1077 return newChild; 1078 } 1079 1080 1081 /** 1082 * Adds a qualifier node. 1083 * 1084 * @param xmpParent the parent xmp node 1085 * @param name the name of the qualifier which has to be 1086 * QName including the <b>default prefix</b> 1087 * @param value the value of the qualifier 1088 * @return Returns the newly created child node. 1089 * @throws XMPException thown on parsing errors 1090 */ 1091 private static XMPNode addQualifierNode(XMPNode xmpParent, String name, String value) 1092 throws XMPException 1093 { 1094 boolean isLang = XML_LANG.equals(name); 1095 1096 XMPNode newQual = null; 1097 1098 // normalize value of language qualifiers 1099 newQual = new XMPNode(name, isLang ? Utils.normalizeLangValue(value) : value, null); 1100 xmpParent.addQualifier(newQual); 1101 1102 return newQual; 1103 } 1104 1105 1106 /** 1107 * The parent is an RDF pseudo-struct containing an rdf:value field. Fix the 1108 * XMP data model. The rdf:value node must be the first child, the other 1109 * children are qualifiers. The form, value, and children of the rdf:value 1110 * node are the real ones. The rdf:value node's qualifiers must be added to 1111 * the others. 1112 * 1113 * @param xmpParent the parent xmp node 1114 * @throws XMPException thown on parsing errors 1115 */ 1116 private static void fixupQualifiedNode(XMPNode xmpParent) throws XMPException 1117 { 1118 assert xmpParent.getOptions().isStruct() && xmpParent.hasChildren(); 1119 1120 XMPNode valueNode = xmpParent.getChild(1); 1121 assert "rdf:value".equals(valueNode.getName()); 1122 1123 // Move the qualifiers on the value node to the parent. 1124 // Make sure an xml:lang qualifier stays at the front. 1125 // Check for duplicate names between the value node's qualifiers and the parent's children. 1126 // The parent's children are about to become qualifiers. Check here, between the groups. 1127 // Intra-group duplicates are caught by XMPNode#addChild(...). 1128 if (valueNode.getOptions().getHasLanguage()) 1129 { 1130 if (xmpParent.getOptions().getHasLanguage()) 1131 { 1132 throw new XMPException("Redundant xml:lang for rdf:value element", 1133 BADXMP); 1134 } 1135 XMPNode langQual = valueNode.getQualifier(1); 1136 valueNode.removeQualifier(langQual); 1137 xmpParent.addQualifier(langQual); 1138 } 1139 1140 // Start the remaining copy after the xml:lang qualifier. 1141 for (int i = 1; i <= valueNode.getQualifierLength(); i++) 1142 { 1143 XMPNode qualifier = valueNode.getQualifier(i); 1144 xmpParent.addQualifier(qualifier); 1145 } 1146 1147 1148 // Change the parent's other children into qualifiers. 1149 // This loop starts at 1, child 0 is the rdf:value node. 1150 for (int i = 2; i <= xmpParent.getChildrenLength(); i++) 1151 { 1152 XMPNode qualifier = xmpParent.getChild(i); 1153 xmpParent.addQualifier(qualifier); 1154 } 1155 1156 // Move the options and value last, other checks need the parent's original options. 1157 // Move the value node's children to be the parent's children. 1158 assert xmpParent.getOptions().isStruct() || xmpParent.getHasValueChild(); 1159 1160 xmpParent.setHasValueChild(false); 1161 xmpParent.getOptions().setStruct(false); 1162 xmpParent.getOptions().mergeWith(valueNode.getOptions()); 1163 xmpParent.setValue(valueNode.getValue()); 1164 1165 xmpParent.removeChildren(); 1166 for (Iterator it = valueNode.iterateChildren(); it.hasNext();) 1167 { 1168 XMPNode child = (XMPNode) it.next(); 1169 xmpParent.addChild(child); 1170 } 1171 } 1172 1173 1174 /** 1175 * Checks if the node is a white space. 1176 * @param node an XML-node 1177 * @return Returns whether the node is a whitespace node, 1178 * i.e. a text node that contains only whitespaces. 1179 */ 1180 private static boolean isWhitespaceNode(Node node) 1181 { 1182 if (node.getNodeType() != Node.TEXT_NODE) 1183 { 1184 return false; 1185 } 1186 1187 String value = node.getNodeValue(); 1188 for (int i = 0; i < value.length(); i++) 1189 { 1190 if (!Character.isWhitespace(value.charAt(i))) 1191 { 1192 return false; 1193 } 1194 } 1195 1196 return true; 1197 } 1198 1199 1200 /** 1201 * 7.2.6 propertyElementURIs 1202 * anyURI - ( coreSyntaxTerms | rdf:Description | oldTerms ) 1203 * 1204 * @param term the term id 1205 * @return Return true if the term is a property element name. 1206 */ 1207 private static boolean isPropertyElementName(int term) 1208 { 1209 if (term == RDFTERM_DESCRIPTION || isOldTerm(term)) 1210 { 1211 return false; 1212 } 1213 else 1214 { 1215 return (!isCoreSyntaxTerm(term)); 1216 } 1217 } 1218 1219 1220 /** 1221 * 7.2.4 oldTerms<br> 1222 * rdf:aboutEach | rdf:aboutEachPrefix | rdf:bagID 1223 * 1224 * @param term the term id 1225 * @return Returns true if the term is an old term. 1226 */ 1227 private static boolean isOldTerm(int term) 1228 { 1229 return RDFTERM_FIRST_OLD <= term && term <= RDFTERM_LAST_OLD; 1230 } 1231 1232 1233 /** 1234 * 7.2.2 coreSyntaxTerms<br> 1235 * rdf:RDF | rdf:ID | rdf:about | rdf:parseType | rdf:resource | rdf:nodeID | 1236 * rdf:datatype 1237 * 1238 * @param term the term id 1239 * @return Return true if the term is a core syntax term 1240 */ 1241 private static boolean isCoreSyntaxTerm(int term) 1242 { 1243 return RDFTERM_FIRST_CORE <= term && term <= RDFTERM_LAST_CORE; 1244 } 1245 1246 1247 /** 1248 * Determines the ID for a certain RDF Term. 1249 * Arranged to hopefully minimize the parse time for large XMP. 1250 * 1251 * @param node an XML node 1252 * @return Returns the term ID. 1253 */ 1254 private static int getRDFTermKind(Node node) 1255 { 1256 String localName = node.getLocalName(); 1257 String namespace = node.getNamespaceURI(); 1258 1259 if ( 1260 namespace == null && 1261 ("about".equals(localName) || "ID".equals(localName)) && 1262 (node instanceof Attr) && 1263 NS_RDF.equals(((Attr) node).getOwnerElement().getNamespaceURI()) 1264 ) 1265 { 1266 namespace = NS_RDF; 1267 } 1268 1269 if (NS_RDF.equals(namespace)) 1270 { 1271 if ("li".equals(localName)) 1272 { 1273 return RDFTERM_LI; 1274 } 1275 else if ("parseType".equals(localName)) 1276 { 1277 return RDFTERM_PARSE_TYPE; 1278 } 1279 else if ("Description".equals(localName)) 1280 { 1281 return RDFTERM_DESCRIPTION; 1282 } 1283 else if ("about".equals(localName)) 1284 { 1285 return RDFTERM_ABOUT; 1286 } 1287 else if ("resource".equals(localName)) 1288 { 1289 return RDFTERM_RESOURCE; 1290 } 1291 else if ("RDF".equals(localName)) 1292 { 1293 return RDFTERM_RDF; 1294 } 1295 else if ("ID".equals(localName)) 1296 { 1297 return RDFTERM_ID; 1298 } 1299 else if ("nodeID".equals(localName)) 1300 { 1301 return RDFTERM_NODE_ID; 1302 } 1303 else if ("datatype".equals(localName)) 1304 { 1305 return RDFTERM_DATATYPE; 1306 } 1307 else if ("aboutEach".equals(localName)) 1308 { 1309 return RDFTERM_ABOUT_EACH; 1310 } 1311 else if ("aboutEachPrefix".equals(localName)) 1312 { 1313 return RDFTERM_ABOUT_EACH_PREFIX; 1314 } 1315 else if ("bagID".equals(localName)) 1316 { 1317 return RDFTERM_BAG_ID; 1318 } 1319 } 1320 1321 return RDFTERM_OTHER; 1322 } 1323 }