1 // ================================================================================================= 2 // ADOBE SYSTEMS INCORPORATED 3 // Copyright 2006 Adobe Systems Incorporated 4 // All Rights Reserved 5 // 6 // NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms 7 // of the Adobe license agreement accompanying it. 8 // ================================================================================================= 9 10 package com.adobe.xmp.impl.xpath; 11 12 import com.adobe.xmp.XMPError; 13 import com.adobe.xmp.XMPException; 14 import com.adobe.xmp.XMPMetaFactory; 15 import com.adobe.xmp.impl.Utils; 16 import com.adobe.xmp.properties.XMPAliasInfo; 17 18 19 /** 20 * Parser for XMP XPaths. 21 * 22 * @since 01.03.2006 23 */ 24 public final class XMPPathParser 25 { 26 /** 27 * Private constructor 28 */ 29 private XMPPathParser() 30 { 31 // empty 32 } 33 34 35 /** 36 * Split an XMPPath expression apart at the conceptual steps, adding the 37 * root namespace prefix to the first property component. The schema URI is 38 * put in the first (0th) slot in the expanded XMPPath. Check if the top 39 * level component is an alias, but don't resolve it. 40 * <p> 41 * In the most verbose case steps are separated by '/', and each step can be 42 * of these forms: 43 * <dl> 44 * <dt>prefix:name 45 * <dd> A top level property or struct field. 46 * <dt>[index] 47 * <dd> An element of an array. 48 * <dt>[last()] 49 * <dd> The last element of an array. 50 * <dt>[fieldName="value"] 51 * <dd> An element in an array of structs, chosen by a field value. 52 * <dt>[@xml:lang="value"] 53 * <dd> An element in an alt-text array, chosen by the xml:lang qualifier. 54 * <dt>[?qualName="value"] 55 * <dd> An element in an array, chosen by a qualifier value. 56 * <dt>@xml:lang 57 * <dd> An xml:lang qualifier. 58 * <dt>?qualName 59 * <dd> A general qualifier. 60 * </dl> 61 * <p> 62 * The logic is complicated though by shorthand for arrays, the separating 63 * '/' and leading '*' are optional. These are all equivalent: array/*[2] 64 * array/[2] array*[2] array[2] All of these are broken into the 2 steps 65 * "array" and "[2]". 66 * <p> 67 * The value portion in the array selector forms is a string quoted by ''' 68 * or '"'. The value may contain any character including a doubled quoting 69 * character. The value may be empty. 70 * <p> 71 * The syntax isn't checked, but an XML name begins with a letter or '_', 72 * and contains letters, digits, '.', '-', '_', and a bunch of special 73 * non-ASCII Unicode characters. An XML qualified name is a pair of names 74 * separated by a colon. 75 * @param schemaNS 76 * schema namespace 77 * @param path 78 * property name 79 * @return Returns the expandet XMPPath. 80 * @throws XMPException 81 * Thrown if the format is not correct somehow. 82 * 83 */ 84 public static XMPPath expandXPath(String schemaNS, String path) throws XMPException 85 { 86 if (schemaNS == null || path == null) 87 { 88 throw new XMPException("Parameter must not be null", XMPError.BADPARAM); 89 } 90 91 XMPPath expandedXPath = new XMPPath(); 92 PathPosition pos = new PathPosition(); 93 pos.path = path; 94 95 // Pull out the first component and do some special processing on it: add the schema 96 // namespace prefix and and see if it is an alias. The start must be a "qualName". 97 parseRootNode(schemaNS, pos, expandedXPath); 98 99 // Now continue to process the rest of the XMPPath string. 100 while (pos.stepEnd < path.length()) 101 { 102 pos.stepBegin = pos.stepEnd; 103 104 skipPathDelimiter(path, pos); 105 106 pos.stepEnd = pos.stepBegin; 107 108 109 XMPPathSegment segment; 110 if (path.charAt(pos.stepBegin) != '[') 111 { 112 // A struct field or qualifier. 113 segment = parseStructSegment(pos); 114 } 115 else 116 { 117 // One of the array forms. 118 segment = parseIndexSegment(pos); 119 } 120 121 122 if (segment.getKind() == XMPPath.STRUCT_FIELD_STEP) 123 { 124 if (segment.getName().charAt(0) == '@') 125 { 126 segment.setName("?" + segment.getName().substring(1)); 127 if (!"?xml:lang".equals(segment.getName())) 128 { 129 throw new XMPException("Only xml:lang allowed with '@'", 130 XMPError.BADXPATH); 131 } 132 } 133 if (segment.getName().charAt(0) == '?') 134 { 135 pos.nameStart++; 136 segment.setKind(XMPPath.QUALIFIER_STEP); 137 } 138 139 verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd)); 140 } 141 else if (segment.getKind() == XMPPath.FIELD_SELECTOR_STEP) 142 { 143 if (segment.getName().charAt(1) == '@') 144 { 145 segment.setName("[?" + segment.getName().substring(2)); 146 if (!segment.getName().startsWith("[?xml:lang=")) 147 { 148 throw new XMPException("Only xml:lang allowed with '@'", 149 XMPError.BADXPATH); 150 } 151 } 152 153 if (segment.getName().charAt(1) == '?') 154 { 155 pos.nameStart++; 156 segment.setKind(XMPPath.QUAL_SELECTOR_STEP); 157 verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd)); 158 } 159 } 160 161 expandedXPath.add(segment); 162 } 163 return expandedXPath; 164 } 165 166 167 /** 168 * @param path 169 * @param pos 170 * @throws XMPException 171 */ 172 private static void skipPathDelimiter(String path, PathPosition pos) throws XMPException 173 { 174 if (path.charAt(pos.stepBegin) == '/') 175 { 176 // skip slash 177 178 pos.stepBegin++; 179 180 // added for Java 181 if (pos.stepBegin >= path.length()) 182 { 183 throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH); 184 } 185 } 186 187 if (path.charAt(pos.stepBegin) == '*') 188 { 189 // skip asterisk 190 191 pos.stepBegin++; 192 if (pos.stepBegin >= path.length() || path.charAt(pos.stepBegin) != '[') 193 { 194 throw new XMPException("Missing '[' after '*'", XMPError.BADXPATH); 195 } 196 } 197 } 198 199 200 /** 201 * Parses a struct segment 202 * @param pos the current position in the path 203 * @return Retusn the segment or an errror 204 * @throws XMPException If the sement is empty 205 */ 206 private static XMPPathSegment parseStructSegment(PathPosition pos) throws XMPException 207 { 208 pos.nameStart = pos.stepBegin; 209 while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0) 210 { 211 pos.stepEnd++; 212 } 213 pos.nameEnd = pos.stepEnd; 214 215 if (pos.stepEnd == pos.stepBegin) 216 { 217 throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH); 218 } 219 220 // ! Touch up later, also changing '@' to '?'. 221 XMPPathSegment segment = new XMPPathSegment(pos.path.substring(pos.stepBegin, pos.stepEnd), 222 XMPPath.STRUCT_FIELD_STEP); 223 return segment; 224 } 225 226 227 /** 228 * Parses an array index segment. 229 * 230 * @param pos the xmp path 231 * @return Returns the segment or an error 232 * @throws XMPException thrown on xmp path errors 233 * 234 */ 235 private static XMPPathSegment parseIndexSegment(PathPosition pos) throws XMPException 236 { 237 XMPPathSegment segment; 238 pos.stepEnd++; // Look at the character after the leading '['. 239 240 if ('0' <= pos.path.charAt(pos.stepEnd) && pos.path.charAt(pos.stepEnd) <= '9') 241 { 242 // A numeric (decimal integer) array index. 243 while (pos.stepEnd < pos.path.length() && '0' <= pos.path.charAt(pos.stepEnd) 244 && pos.path.charAt(pos.stepEnd) <= '9') 245 { 246 pos.stepEnd++; 247 } 248 249 segment = new XMPPathSegment(null, XMPPath.ARRAY_INDEX_STEP); 250 } 251 else 252 { 253 // Could be "[last()]" or one of the selector forms. Find the ']' or '='. 254 255 while (pos.stepEnd < pos.path.length() && pos.path.charAt(pos.stepEnd) != ']' 256 && pos.path.charAt(pos.stepEnd) != '=') 257 { 258 pos.stepEnd++; 259 } 260 261 if (pos.stepEnd >= pos.path.length()) 262 { 263 throw new XMPException("Missing ']' or '=' for array index", XMPError.BADXPATH); 264 } 265 266 if (pos.path.charAt(pos.stepEnd) == ']') 267 { 268 if (!"[last()".equals(pos.path.substring(pos.stepBegin, pos.stepEnd))) 269 { 270 throw new XMPException( 271 "Invalid non-numeric array index", XMPError.BADXPATH); 272 } 273 segment = new XMPPathSegment(null, XMPPath.ARRAY_LAST_STEP); 274 } 275 else 276 { 277 pos.nameStart = pos.stepBegin + 1; 278 pos.nameEnd = pos.stepEnd; 279 pos.stepEnd++; // Absorb the '=', remember the quote. 280 char quote = pos.path.charAt(pos.stepEnd); 281 if (quote != '\'' && quote != '"') 282 { 283 throw new XMPException( 284 "Invalid quote in array selector", XMPError.BADXPATH); 285 } 286 287 pos.stepEnd++; // Absorb the leading quote. 288 while (pos.stepEnd < pos.path.length()) 289 { 290 if (pos.path.charAt(pos.stepEnd) == quote) 291 { 292 // check for escaped quote 293 if (pos.stepEnd + 1 >= pos.path.length() 294 || pos.path.charAt(pos.stepEnd + 1) != quote) 295 { 296 break; 297 } 298 pos.stepEnd++; 299 } 300 pos.stepEnd++; 301 } 302 303 if (pos.stepEnd >= pos.path.length()) 304 { 305 throw new XMPException("No terminating quote for array selector", 306 XMPError.BADXPATH); 307 } 308 pos.stepEnd++; // Absorb the trailing quote. 309 310 // ! Touch up later, also changing '@' to '?'. 311 segment = new XMPPathSegment(null, XMPPath.FIELD_SELECTOR_STEP); 312 } 313 } 314 315 316 if (pos.stepEnd >= pos.path.length() || pos.path.charAt(pos.stepEnd) != ']') 317 { 318 throw new XMPException("Missing ']' for array index", XMPError.BADXPATH); 319 } 320 pos.stepEnd++; 321 segment.setName(pos.path.substring(pos.stepBegin, pos.stepEnd)); 322 323 return segment; 324 } 325 326 327 /** 328 * Parses the root node of an XMP Path, checks if namespace and prefix fit together 329 * and resolve the property to the base property if it is an alias. 330 * @param schemaNS the root namespace 331 * @param pos the parsing position helper 332 * @param expandedXPath the path to contribute to 333 * @throws XMPException If the path is not valid. 334 */ 335 private static void parseRootNode(String schemaNS, PathPosition pos, XMPPath expandedXPath) 336 throws XMPException 337 { 338 while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0) 339 { 340 pos.stepEnd++; 341 } 342 343 if (pos.stepEnd == pos.stepBegin) 344 { 345 throw new XMPException("Empty initial XMPPath step", XMPError.BADXPATH); 346 } 347 348 String rootProp = verifyXPathRoot(schemaNS, pos.path.substring(pos.stepBegin, pos.stepEnd)); 349 XMPAliasInfo aliasInfo = XMPMetaFactory.getSchemaRegistry().findAlias(rootProp); 350 if (aliasInfo == null) 351 { 352 // add schema xpath step 353 expandedXPath.add(new XMPPathSegment(schemaNS, XMPPath.SCHEMA_NODE)); 354 XMPPathSegment rootStep = new XMPPathSegment(rootProp, XMPPath.STRUCT_FIELD_STEP); 355 expandedXPath.add(rootStep); 356 } 357 else 358 { 359 // add schema xpath step and base step of alias 360 expandedXPath.add(new XMPPathSegment(aliasInfo.getNamespace(), XMPPath.SCHEMA_NODE)); 361 XMPPathSegment rootStep = new XMPPathSegment(verifyXPathRoot(aliasInfo.getNamespace(), 362 aliasInfo.getPropName()), 363 XMPPath.STRUCT_FIELD_STEP); 364 rootStep.setAlias(true); 365 rootStep.setAliasForm(aliasInfo.getAliasForm().getOptions()); 366 expandedXPath.add(rootStep); 367 368 if (aliasInfo.getAliasForm().isArrayAltText()) 369 { 370 XMPPathSegment qualSelectorStep = new XMPPathSegment("[?xml:lang='x-default']", 371 XMPPath.QUAL_SELECTOR_STEP); 372 qualSelectorStep.setAlias(true); 373 qualSelectorStep.setAliasForm(aliasInfo.getAliasForm().getOptions()); 374 expandedXPath.add(qualSelectorStep); 375 } 376 else if (aliasInfo.getAliasForm().isArray()) 377 { 378 XMPPathSegment indexStep = new XMPPathSegment("[1]", 379 XMPPath.ARRAY_INDEX_STEP); 380 indexStep.setAlias(true); 381 indexStep.setAliasForm(aliasInfo.getAliasForm().getOptions()); 382 expandedXPath.add(indexStep); 383 } 384 } 385 } 386 387 388 /** 389 * Verifies whether the qualifier name is not XML conformant or the 390 * namespace prefix has not been registered. 391 * 392 * @param qualName 393 * a qualifier name 394 * @throws XMPException 395 * If the name is not conformant 396 */ 397 private static void verifyQualName(String qualName) throws XMPException 398 { 399 int colonPos = qualName.indexOf(':'); 400 if (colonPos > 0) 401 { 402 String prefix = qualName.substring(0, colonPos); 403 if (Utils.isXMLNameNS(prefix)) 404 { 405 String regURI = XMPMetaFactory.getSchemaRegistry().getNamespaceURI( 406 prefix); 407 if (regURI != null) 408 { 409 return; 410 } 411 412 throw new XMPException("Unknown namespace prefix for qualified name", 413 XMPError.BADXPATH); 414 } 415 } 416 417 throw new XMPException("Ill-formed qualified name", XMPError.BADXPATH); 418 } 419 420 421 /** 422 * Verify if an XML name is conformant. 423 * 424 * @param name 425 * an XML name 426 * @throws XMPException 427 * When the name is not XML conformant 428 */ 429 private static void verifySimpleXMLName(String name) throws XMPException 430 { 431 if (!Utils.isXMLName(name)) 432 { 433 throw new XMPException("Bad XML name", XMPError.BADXPATH); 434 } 435 } 436 437 438 /** 439 * Set up the first 2 components of the expanded XMPPath. Normalizes the various cases of using 440 * the full schema URI and/or a qualified root property name. Returns true for normal 441 * processing. If allowUnknownSchemaNS is true and the schema namespace is not registered, false 442 * is returned. If allowUnknownSchemaNS is false and the schema namespace is not registered, an 443 * exception is thrown 444 * <P> 445 * (Should someday check the full syntax:) 446 * 447 * @param schemaNS schema namespace 448 * @param rootProp the root xpath segment 449 * @return Returns root QName. 450 * @throws XMPException Thrown if the format is not correct somehow. 451 */ 452 private static String verifyXPathRoot(String schemaNS, String rootProp) 453 throws XMPException 454 { 455 // Do some basic checks on the URI and name. Try to lookup the URI. See if the name is 456 // qualified. 457 458 if (schemaNS == null || schemaNS.length() == 0) 459 { 460 throw new XMPException( 461 "Schema namespace URI is required", XMPError.BADSCHEMA); 462 } 463 464 if ((rootProp.charAt(0) == '?') || (rootProp.charAt(0) == '@')) 465 { 466 throw new XMPException("Top level name must not be a qualifier", XMPError.BADXPATH); 467 } 468 469 if (rootProp.indexOf('/') >= 0 || rootProp.indexOf('[') >= 0) 470 { 471 throw new XMPException("Top level name must be simple", XMPError.BADXPATH); 472 } 473 474 String prefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS); 475 if (prefix == null) 476 { 477 throw new XMPException("Unregistered schema namespace URI", XMPError.BADSCHEMA); 478 } 479 480 // Verify the various URI and prefix combinations. Initialize the 481 // expanded XMPPath. 482 int colonPos = rootProp.indexOf(':'); 483 if (colonPos < 0) 484 { 485 // The propName is unqualified, use the schemaURI and associated 486 // prefix. 487 verifySimpleXMLName(rootProp); // Verify the part before any colon 488 return prefix + rootProp; 489 } 490 else 491 { 492 // The propName is qualified. Make sure the prefix is legit. Use the associated URI and 493 // qualified name. 494 495 // Verify the part before any colon 496 verifySimpleXMLName(rootProp.substring(0, colonPos)); 497 verifySimpleXMLName(rootProp.substring(colonPos)); 498 499 prefix = rootProp.substring(0, colonPos + 1); 500 501 String regPrefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS); 502 if (regPrefix == null) 503 { 504 throw new XMPException("Unknown schema namespace prefix", XMPError.BADSCHEMA); 505 } 506 if (!prefix.equals(regPrefix)) 507 { 508 throw new XMPException("Schema namespace URI and prefix mismatch", 509 XMPError.BADSCHEMA); 510 } 511 512 return rootProp; 513 } 514 } 515 } 516 517 518 519 520 521 /** 522 * This objects contains all needed char positions to parse. 523 */ 524 class PathPosition 525 { 526 /** the complete path */ 527 public String path = null; 528 /** the start of a segment name */ 529 int nameStart = 0; 530 /** the end of a segment name */ 531 int nameEnd = 0; 532 /** the begin of a step */ 533 int stepBegin = 0; 534 /** the end of a step */ 535 int stepEnd = 0; 536 } 537 538