Home | History | Annotate | Download | only in xpath
      1 // =================================================================================================
      2 // ADOBE SYSTEMS INCORPORATED
      3 // Copyright 2006 Adobe Systems Incorporated
      4 // All Rights Reserved
      5 //
      6 // NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
      7 // of the Adobe license agreement accompanying it.
      8 // =================================================================================================
      9 
     10 package com.adobe.xmp.impl.xpath;
     11 
     12 import com.adobe.xmp.XMPError;
     13 import com.adobe.xmp.XMPException;
     14 import com.adobe.xmp.XMPMetaFactory;
     15 import com.adobe.xmp.impl.Utils;
     16 import com.adobe.xmp.properties.XMPAliasInfo;
     17 
     18 
     19 /**
     20  * Parser for XMP XPaths.
     21  *
     22  * @since   01.03.2006
     23  */
     24 public final class XMPPathParser
     25 {
     26 	/**
     27 	 * Private constructor
     28 	 */
     29 	private XMPPathParser()
     30 	{
     31 		// empty
     32 	}
     33 
     34 
     35 	/**
     36 	 * Split an XMPPath expression apart at the conceptual steps, adding the
     37 	 * root namespace prefix to the first property component. The schema URI is
     38 	 * put in the first (0th) slot in the expanded XMPPath. Check if the top
     39 	 * level component is an alias, but don't resolve it.
     40 	 * <p>
     41 	 * In the most verbose case steps are separated by '/', and each step can be
     42 	 * of these forms:
     43 	 * <dl>
     44 	 * <dt>prefix:name
     45 	 * <dd> A top level property or struct field.
     46 	 * <dt>[index]
     47 	 * <dd> An element of an array.
     48 	 * <dt>[last()]
     49 	 * <dd> The last element of an array.
     50 	 * <dt>[fieldName=&quot;value&quot;]
     51 	 * <dd> An element in an array of structs, chosen by a field value.
     52 	 * <dt>[@xml:lang=&quot;value&quot;]
     53 	 * <dd> An element in an alt-text array, chosen by the xml:lang qualifier.
     54 	 * <dt>[?qualName=&quot;value&quot;]
     55 	 * <dd> An element in an array, chosen by a qualifier value.
     56 	 * <dt>@xml:lang
     57 	 * <dd> An xml:lang qualifier.
     58 	 * <dt>?qualName
     59 	 * <dd> A general qualifier.
     60 	 * </dl>
     61 	 * <p>
     62 	 * The logic is complicated though by shorthand for arrays, the separating
     63 	 * '/' and leading '*' are optional. These are all equivalent: array/*[2]
     64 	 * array/[2] array*[2] array[2] All of these are broken into the 2 steps
     65 	 * "array" and "[2]".
     66 	 * <p>
     67 	 * The value portion in the array selector forms is a string quoted by '''
     68 	 * or '"'. The value may contain any character including a doubled quoting
     69 	 * character. The value may be empty.
     70 	 * <p>
     71 	 * The syntax isn't checked, but an XML name begins with a letter or '_',
     72 	 * and contains letters, digits, '.', '-', '_', and a bunch of special
     73 	 * non-ASCII Unicode characters. An XML qualified name is a pair of names
     74 	 * separated by a colon.
     75 	 * @param schemaNS
     76 	 *            schema namespace
     77 	 * @param path
     78 	 *            property name
     79 	 * @return Returns the expandet XMPPath.
     80 	 * @throws XMPException
     81 	 *             Thrown if the format is not correct somehow.
     82 	 *
     83 	 */
     84 	public static XMPPath expandXPath(String schemaNS, String path) throws XMPException
     85 	{
     86 		if (schemaNS == null  ||  path == null)
     87 		{
     88 			throw new XMPException("Parameter must not be null", XMPError.BADPARAM);
     89 		}
     90 
     91 		XMPPath expandedXPath = new XMPPath();
     92 		PathPosition pos = new PathPosition();
     93 		pos.path = path;
     94 
     95 		// Pull out the first component and do some special processing on it: add the schema
     96 		// namespace prefix and and see if it is an alias. The start must be a "qualName".
     97 		parseRootNode(schemaNS, pos, expandedXPath);
     98 
     99 		// Now continue to process the rest of the XMPPath string.
    100 		while (pos.stepEnd < path.length())
    101 		{
    102 			pos.stepBegin = pos.stepEnd;
    103 
    104 			skipPathDelimiter(path, pos);
    105 
    106 			pos.stepEnd = pos.stepBegin;
    107 
    108 
    109 			XMPPathSegment segment;
    110 			if (path.charAt(pos.stepBegin) != '[')
    111 			{
    112 				// A struct field or qualifier.
    113 				segment = parseStructSegment(pos);
    114 			}
    115 			else
    116 			{
    117 				// One of the array forms.
    118 				segment = parseIndexSegment(pos);
    119 			}
    120 
    121 
    122 			if (segment.getKind() == XMPPath.STRUCT_FIELD_STEP)
    123 			{
    124 				if (segment.getName().charAt(0) == '@')
    125 				{
    126 					segment.setName("?" + segment.getName().substring(1));
    127 					if (!"?xml:lang".equals(segment.getName()))
    128 					{
    129 						throw new XMPException("Only xml:lang allowed with '@'",
    130 								XMPError.BADXPATH);
    131 					}
    132 				}
    133 				if (segment.getName().charAt(0) == '?')
    134 				{
    135 					pos.nameStart++;
    136 					segment.setKind(XMPPath.QUALIFIER_STEP);
    137 				}
    138 
    139 				verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd));
    140 			}
    141 			else if (segment.getKind() == XMPPath.FIELD_SELECTOR_STEP)
    142 			{
    143 				if (segment.getName().charAt(1) == '@')
    144 				{
    145 					segment.setName("[?" + segment.getName().substring(2));
    146 					if (!segment.getName().startsWith("[?xml:lang="))
    147 					{
    148 						throw new XMPException("Only xml:lang allowed with '@'",
    149 								XMPError.BADXPATH);
    150 					}
    151 				}
    152 
    153 				if (segment.getName().charAt(1) == '?')
    154 				{
    155 					pos.nameStart++;
    156 					segment.setKind(XMPPath.QUAL_SELECTOR_STEP);
    157 					verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd));
    158 				}
    159 			}
    160 
    161 			expandedXPath.add(segment);
    162 		}
    163 		return expandedXPath;
    164 	}
    165 
    166 
    167 	/**
    168 	 * @param path
    169 	 * @param pos
    170 	 * @throws XMPException
    171 	 */
    172 	private static void skipPathDelimiter(String path, PathPosition pos) throws XMPException
    173 	{
    174 		if (path.charAt(pos.stepBegin) == '/')
    175 		{
    176 			// skip slash
    177 
    178 			pos.stepBegin++;
    179 
    180 			// added for Java
    181 			if (pos.stepBegin >= path.length())
    182 			{
    183 				throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH);
    184 			}
    185 		}
    186 
    187 		if (path.charAt(pos.stepBegin) == '*')
    188 		{
    189 			// skip asterisk
    190 
    191 			pos.stepBegin++;
    192 			if (pos.stepBegin >= path.length() || path.charAt(pos.stepBegin) != '[')
    193 			{
    194 				throw new XMPException("Missing '[' after '*'", XMPError.BADXPATH);
    195 			}
    196 		}
    197 	}
    198 
    199 
    200 	/**
    201 	 * Parses a struct segment
    202 	 * @param pos the current position in the path
    203 	 * @return Retusn the segment or an errror
    204 	 * @throws XMPException If the sement is empty
    205 	 */
    206 	private static XMPPathSegment parseStructSegment(PathPosition pos) throws XMPException
    207 	{
    208 		pos.nameStart = pos.stepBegin;
    209 		while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0)
    210 		{
    211 			pos.stepEnd++;
    212 		}
    213 		pos.nameEnd = pos.stepEnd;
    214 
    215 		if (pos.stepEnd == pos.stepBegin)
    216 		{
    217 			throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH);
    218 		}
    219 
    220 		// ! Touch up later, also changing '@' to '?'.
    221 		XMPPathSegment segment = new XMPPathSegment(pos.path.substring(pos.stepBegin, pos.stepEnd),
    222 				XMPPath.STRUCT_FIELD_STEP);
    223 		return segment;
    224 	}
    225 
    226 
    227 	/**
    228 	 * Parses an array index segment.
    229 	 *
    230 	 * @param pos the xmp path
    231 	 * @return Returns the segment or an error
    232 	 * @throws XMPException thrown on xmp path errors
    233 	 *
    234 	 */
    235 	private static XMPPathSegment parseIndexSegment(PathPosition pos) throws XMPException
    236 	{
    237 		XMPPathSegment segment;
    238 		pos.stepEnd++; // Look at the character after the leading '['.
    239 
    240 		if ('0' <= pos.path.charAt(pos.stepEnd) && pos.path.charAt(pos.stepEnd) <= '9')
    241 		{
    242 			// A numeric (decimal integer) array index.
    243 			while (pos.stepEnd < pos.path.length() && '0' <= pos.path.charAt(pos.stepEnd)
    244 					&& pos.path.charAt(pos.stepEnd) <= '9')
    245 			{
    246 				pos.stepEnd++;
    247 			}
    248 
    249 			segment = new XMPPathSegment(null, XMPPath.ARRAY_INDEX_STEP);
    250 		}
    251 		else
    252 		{
    253 			// Could be "[last()]" or one of the selector forms. Find the ']' or '='.
    254 
    255 			while (pos.stepEnd < pos.path.length() && pos.path.charAt(pos.stepEnd) != ']'
    256 					&& pos.path.charAt(pos.stepEnd) != '=')
    257 			{
    258 				pos.stepEnd++;
    259 			}
    260 
    261 			if (pos.stepEnd >= pos.path.length())
    262 			{
    263 				throw new XMPException("Missing ']' or '=' for array index", XMPError.BADXPATH);
    264 			}
    265 
    266 			if (pos.path.charAt(pos.stepEnd) == ']')
    267 			{
    268 				if (!"[last()".equals(pos.path.substring(pos.stepBegin, pos.stepEnd)))
    269 				{
    270 					throw new XMPException(
    271 						"Invalid non-numeric array index", XMPError.BADXPATH);
    272 				}
    273 				segment = new XMPPathSegment(null, XMPPath.ARRAY_LAST_STEP);
    274 			}
    275 			else
    276 			{
    277 				pos.nameStart = pos.stepBegin + 1;
    278 				pos.nameEnd = pos.stepEnd;
    279 				pos.stepEnd++; // Absorb the '=', remember the quote.
    280 				char quote = pos.path.charAt(pos.stepEnd);
    281 				if (quote != '\'' && quote != '"')
    282 				{
    283 					throw new XMPException(
    284 						"Invalid quote in array selector", XMPError.BADXPATH);
    285 				}
    286 
    287 				pos.stepEnd++; // Absorb the leading quote.
    288 				while (pos.stepEnd < pos.path.length())
    289 				{
    290 					if (pos.path.charAt(pos.stepEnd) == quote)
    291 					{
    292 						// check for escaped quote
    293 						if (pos.stepEnd + 1 >= pos.path.length()
    294 								|| pos.path.charAt(pos.stepEnd + 1) != quote)
    295 						{
    296 							break;
    297 						}
    298 						pos.stepEnd++;
    299 					}
    300 					pos.stepEnd++;
    301 				}
    302 
    303 				if (pos.stepEnd >= pos.path.length())
    304 				{
    305 					throw new XMPException("No terminating quote for array selector",
    306 							XMPError.BADXPATH);
    307 				}
    308 				pos.stepEnd++; // Absorb the trailing quote.
    309 
    310 				// ! Touch up later, also changing '@' to '?'.
    311 				segment = new XMPPathSegment(null, XMPPath.FIELD_SELECTOR_STEP);
    312 			}
    313 		}
    314 
    315 
    316 		if (pos.stepEnd >= pos.path.length() || pos.path.charAt(pos.stepEnd) != ']')
    317 		{
    318 			throw new XMPException("Missing ']' for array index", XMPError.BADXPATH);
    319 		}
    320 		pos.stepEnd++;
    321 		segment.setName(pos.path.substring(pos.stepBegin, pos.stepEnd));
    322 
    323 		return segment;
    324 	}
    325 
    326 
    327 	/**
    328 	 * Parses the root node of an XMP Path, checks if namespace and prefix fit together
    329 	 * and resolve the property to the base property if it is an alias.
    330 	 * @param schemaNS the root namespace
    331 	 * @param pos the parsing position helper
    332 	 * @param expandedXPath  the path to contribute to
    333 	 * @throws XMPException If the path is not valid.
    334 	 */
    335 	private static void parseRootNode(String schemaNS, PathPosition pos, XMPPath expandedXPath)
    336 			throws XMPException
    337 	{
    338 		while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0)
    339 		{
    340 			pos.stepEnd++;
    341 		}
    342 
    343 		if (pos.stepEnd == pos.stepBegin)
    344 		{
    345 			throw new XMPException("Empty initial XMPPath step", XMPError.BADXPATH);
    346 		}
    347 
    348 		String rootProp = verifyXPathRoot(schemaNS, pos.path.substring(pos.stepBegin, pos.stepEnd));
    349 		XMPAliasInfo aliasInfo = XMPMetaFactory.getSchemaRegistry().findAlias(rootProp);
    350 		if (aliasInfo == null)
    351 		{
    352 			// add schema xpath step
    353 			expandedXPath.add(new XMPPathSegment(schemaNS, XMPPath.SCHEMA_NODE));
    354 			XMPPathSegment rootStep = new XMPPathSegment(rootProp, XMPPath.STRUCT_FIELD_STEP);
    355 			expandedXPath.add(rootStep);
    356 		}
    357 		else
    358 		{
    359 			// add schema xpath step and base step of alias
    360 			expandedXPath.add(new XMPPathSegment(aliasInfo.getNamespace(), XMPPath.SCHEMA_NODE));
    361 			XMPPathSegment rootStep = new XMPPathSegment(verifyXPathRoot(aliasInfo.getNamespace(),
    362 					aliasInfo.getPropName()),
    363 					XMPPath.STRUCT_FIELD_STEP);
    364 			rootStep.setAlias(true);
    365 			rootStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
    366 			expandedXPath.add(rootStep);
    367 
    368 			if (aliasInfo.getAliasForm().isArrayAltText())
    369 			{
    370 				XMPPathSegment qualSelectorStep = new XMPPathSegment("[?xml:lang='x-default']",
    371 						XMPPath.QUAL_SELECTOR_STEP);
    372 				qualSelectorStep.setAlias(true);
    373 				qualSelectorStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
    374 				expandedXPath.add(qualSelectorStep);
    375 			}
    376 			else if (aliasInfo.getAliasForm().isArray())
    377 			{
    378 				XMPPathSegment indexStep = new XMPPathSegment("[1]",
    379 					XMPPath.ARRAY_INDEX_STEP);
    380 				indexStep.setAlias(true);
    381 				indexStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
    382 				expandedXPath.add(indexStep);
    383 			}
    384 		}
    385 	}
    386 
    387 
    388 	/**
    389 	 * Verifies whether the qualifier name is not XML conformant or the
    390 	 * namespace prefix has not been registered.
    391 	 *
    392 	 * @param qualName
    393 	 *            a qualifier name
    394 	 * @throws XMPException
    395 	 *             If the name is not conformant
    396 	 */
    397 	private static void verifyQualName(String qualName) throws XMPException
    398 	{
    399 		int colonPos = qualName.indexOf(':');
    400 		if (colonPos > 0)
    401 		{
    402 			String prefix = qualName.substring(0, colonPos);
    403 			if (Utils.isXMLNameNS(prefix))
    404 			{
    405 				String regURI = XMPMetaFactory.getSchemaRegistry().getNamespaceURI(
    406 						prefix);
    407 				if (regURI != null)
    408 				{
    409 					return;
    410 				}
    411 
    412 				throw new XMPException("Unknown namespace prefix for qualified name",
    413 						XMPError.BADXPATH);
    414 			}
    415 		}
    416 
    417 		throw new XMPException("Ill-formed qualified name", XMPError.BADXPATH);
    418 	}
    419 
    420 
    421 	/**
    422 	 * Verify if an XML name is conformant.
    423 	 *
    424 	 * @param name
    425 	 *            an XML name
    426 	 * @throws XMPException
    427 	 *             When the name is not XML conformant
    428 	 */
    429 	private static void verifySimpleXMLName(String name) throws XMPException
    430 	{
    431 		if (!Utils.isXMLName(name))
    432 		{
    433 			throw new XMPException("Bad XML name", XMPError.BADXPATH);
    434 		}
    435 	}
    436 
    437 
    438 	/**
    439 	 * Set up the first 2 components of the expanded XMPPath. Normalizes the various cases of using
    440 	 * the full schema URI and/or a qualified root property name. Returns true for normal
    441 	 * processing. If allowUnknownSchemaNS is true and the schema namespace is not registered, false
    442 	 * is returned. If allowUnknownSchemaNS is false and the schema namespace is not registered, an
    443 	 * exception is thrown
    444 	 * <P>
    445 	 * (Should someday check the full syntax:)
    446 	 *
    447 	 * @param schemaNS schema namespace
    448 	 * @param rootProp the root xpath segment
    449 	 * @return Returns root QName.
    450 	 * @throws XMPException Thrown if the format is not correct somehow.
    451 	 */
    452 	private static String verifyXPathRoot(String schemaNS, String rootProp)
    453 		throws XMPException
    454 	{
    455 		// Do some basic checks on the URI and name. Try to lookup the URI. See if the name is
    456 		// qualified.
    457 
    458 		if (schemaNS == null || schemaNS.length() == 0)
    459 		{
    460 			throw new XMPException(
    461 				"Schema namespace URI is required", XMPError.BADSCHEMA);
    462 		}
    463 
    464 		if ((rootProp.charAt(0) == '?') || (rootProp.charAt(0) == '@'))
    465 		{
    466 			throw new XMPException("Top level name must not be a qualifier", XMPError.BADXPATH);
    467 		}
    468 
    469 		if (rootProp.indexOf('/') >= 0 || rootProp.indexOf('[') >= 0)
    470 		{
    471 			throw new XMPException("Top level name must be simple", XMPError.BADXPATH);
    472 		}
    473 
    474 		String prefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS);
    475 		if (prefix == null)
    476 		{
    477 			throw new XMPException("Unregistered schema namespace URI", XMPError.BADSCHEMA);
    478 		}
    479 
    480 		// Verify the various URI and prefix combinations. Initialize the
    481 		// expanded XMPPath.
    482 		int colonPos = rootProp.indexOf(':');
    483 		if (colonPos < 0)
    484 		{
    485 			// The propName is unqualified, use the schemaURI and associated
    486 			// prefix.
    487 			verifySimpleXMLName(rootProp); // Verify the part before any colon
    488 			return prefix + rootProp;
    489 		}
    490 		else
    491 		{
    492 			// The propName is qualified. Make sure the prefix is legit. Use the associated URI and
    493 			// qualified name.
    494 
    495 			// Verify the part before any colon
    496 			verifySimpleXMLName(rootProp.substring(0, colonPos));
    497 			verifySimpleXMLName(rootProp.substring(colonPos));
    498 
    499 			prefix = rootProp.substring(0, colonPos + 1);
    500 
    501 			String regPrefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS);
    502 			if (regPrefix == null)
    503 			{
    504 				throw new XMPException("Unknown schema namespace prefix", XMPError.BADSCHEMA);
    505 			}
    506 			if (!prefix.equals(regPrefix))
    507 			{
    508 				throw new XMPException("Schema namespace URI and prefix mismatch",
    509 						XMPError.BADSCHEMA);
    510 			}
    511 
    512 			return rootProp;
    513 		}
    514 	}
    515 }
    516 
    517 
    518 
    519 
    520 
    521 /**
    522  * This objects contains all needed char positions to parse.
    523  */
    524 class PathPosition
    525 {
    526 	/** the complete path */
    527 	public String path = null;
    528 	/** the start of a segment name */
    529 	int nameStart = 0;
    530 	/** the end of a segment name */
    531 	int nameEnd = 0;
    532 	/** the begin of a step */
    533 	int stepBegin = 0;
    534 	/** the end of a step */
    535 	int stepEnd = 0;
    536 }
    537 
    538