Home | History | Annotate | Download | only in impl
      1 // =================================================================================================
      2 // ADOBE SYSTEMS INCORPORATED
      3 // Copyright 2006 Adobe Systems Incorporated
      4 // All Rights Reserved
      5 //
      6 // NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
      7 // of the Adobe license agreement accompanying it.
      8 // =================================================================================================
      9 
     10 package com.adobe.xmp.impl;
     11 
     12 
     13 import com.adobe.xmp.XMPConst;
     14 
     15 
     16 /**
     17  * Utility functions for the XMPToolkit implementation.
     18  *
     19  * @since 06.06.2006
     20  */
     21 public class Utils implements XMPConst
     22 {
     23 	/** segments of a UUID */
     24 	public static final int UUID_SEGMENT_COUNT = 4;
     25 	/** length of a UUID */
     26 	public static final int UUID_LENGTH = 32 + UUID_SEGMENT_COUNT;
     27 	/** table of XML name start chars (<= 0xFF) */
     28 	private  static boolean[] xmlNameStartChars;
     29 	/** table of XML name chars (<= 0xFF) */
     30 	private static boolean[] xmlNameChars;
     31 	/** init char tables */
     32 	static
     33 	{
     34 		initCharTables();
     35 	}
     36 
     37 
     38 	/**
     39 	 * Private constructor
     40 	 */
     41 	private Utils()
     42 	{
     43 		// EMPTY
     44 	}
     45 
     46 
     47 	/**
     48 	 * Normalize an xml:lang value so that comparisons are effectively case
     49 	 * insensitive as required by RFC 3066 (which superceeds RFC 1766). The
     50 	 * normalization rules:
     51 	 * <ul>
     52 	 * <li> The primary subtag is lower case, the suggested practice of ISO 639.
     53 	 * <li> All 2 letter secondary subtags are upper case, the suggested
     54 	 * practice of ISO 3166.
     55 	 * <li> All other subtags are lower case.
     56 	 * </ul>
     57 	 *
     58 	 * @param value
     59 	 *            raw value
     60 	 * @return Returns the normalized value.
     61 	 */
     62 	public static String normalizeLangValue(String value)
     63 	{
     64 		// don't normalize x-default
     65 		if (XMPConst.X_DEFAULT.equals(value))
     66 		{
     67 			return value;
     68 		}
     69 
     70 		int subTag = 1;
     71 		StringBuffer buffer = new StringBuffer();
     72 
     73 		for (int i = 0; i < value.length(); i++)
     74 		{
     75 			switch (value.charAt(i))
     76 			{
     77 			case '-':
     78 			case '_':
     79 				// move to next subtag and convert underscore to hyphen
     80 				buffer.append('-');
     81 				subTag++;
     82 				break;
     83 			case ' ':
     84 				// remove spaces
     85 				break;
     86 			default:
     87 				// convert second subtag to uppercase, all other to lowercase
     88 				if (subTag != 2)
     89 				{
     90 					buffer.append(Character.toLowerCase(value.charAt(i)));
     91 				}
     92 				else
     93 				{
     94 					buffer.append(Character.toUpperCase(value.charAt(i)));
     95 				}
     96 			}
     97 
     98 		}
     99 		return buffer.toString();
    100 	}
    101 
    102 
    103 	/**
    104 	 * Split the name and value parts for field and qualifier selectors:
    105 	 * <ul>
    106 	 * <li>[qualName="value"] - An element in an array of structs, chosen by a
    107 	 * field value.
    108 	 * <li>[?qualName="value"] - An element in an array, chosen by a qualifier
    109 	 * value.
    110 	 * </ul>
    111 	 * The value portion is a string quoted by ''' or '"'. The value may contain
    112 	 * any character including a doubled quoting character. The value may be
    113 	 * empty. <em>Note:</em> It is assumed that the expression is formal
    114 	 * correct
    115 	 *
    116 	 * @param selector
    117 	 *            the selector
    118 	 * @return Returns an array where the first entry contains the name and the
    119 	 *         second the value.
    120 	 */
    121 	static String[] splitNameAndValue(String selector)
    122 	{
    123 		// get the name
    124 		int eq = selector.indexOf('=');
    125 		int pos = 1;
    126 		if (selector.charAt(pos) == '?')
    127 		{
    128 			pos++;
    129 		}
    130 		String name = selector.substring(pos, eq);
    131 
    132 		// get the value
    133 		pos = eq + 1;
    134 		char quote = selector.charAt(pos);
    135 		pos++;
    136 		int end = selector.length() - 2; // quote and ]
    137 		StringBuffer value = new StringBuffer(end - eq);
    138 		while (pos < end)
    139 		{
    140 			value.append(selector.charAt(pos));
    141 			pos++;
    142 			if (selector.charAt(pos) == quote)
    143 			{
    144 				// skip one quote in value
    145 				pos++;
    146 			}
    147 		}
    148 		return new String[] { name, value.toString() };
    149 	}
    150 
    151 
    152 	/**
    153 	 *
    154 	 * @param schema
    155 	 *            a schema namespace
    156 	 * @param prop
    157 	 *            an XMP Property
    158 	 * @return Returns true if the property is defined as &quot;Internal
    159 	 *         Property&quot;, see XMP Specification.
    160 	 */
    161 	static boolean isInternalProperty(String schema, String prop)
    162 	{
    163 		boolean isInternal = false;
    164 
    165 		if (NS_DC.equals(schema))
    166 		{
    167 			if ("dc:format".equals(prop) || "dc:language".equals(prop))
    168 			{
    169 				isInternal = true;
    170 			}
    171 		}
    172 		else if (NS_XMP.equals(schema))
    173 		{
    174 			if ("xmp:BaseURL".equals(prop) || "xmp:CreatorTool".equals(prop)
    175 					|| "xmp:Format".equals(prop) || "xmp:Locale".equals(prop)
    176 					|| "xmp:MetadataDate".equals(prop) || "xmp:ModifyDate".equals(prop))
    177 			{
    178 				isInternal = true;
    179 			}
    180 		}
    181 		else if (NS_PDF.equals(schema))
    182 		{
    183 			if ("pdf:BaseURL".equals(prop) || "pdf:Creator".equals(prop)
    184 					|| "pdf:ModDate".equals(prop) || "pdf:PDFVersion".equals(prop)
    185 					|| "pdf:Producer".equals(prop))
    186 			{
    187 				isInternal = true;
    188 			}
    189 		}
    190 		else if (NS_TIFF.equals(schema))
    191 		{
    192 			isInternal = true;
    193 			if ("tiff:ImageDescription".equals(prop) || "tiff:Artist".equals(prop)
    194 					|| "tiff:Copyright".equals(prop))
    195 			{
    196 				isInternal = false;
    197 			}
    198 		}
    199 		else if (NS_EXIF.equals(schema))
    200 		{
    201 			isInternal = true;
    202 			if ("exif:UserComment".equals(prop))
    203 			{
    204 				isInternal = false;
    205 			}
    206 		}
    207 		else if (NS_EXIF_AUX.equals(schema))
    208 		{
    209 			isInternal = true;
    210 		}
    211 		else if (NS_PHOTOSHOP.equals(schema))
    212 		{
    213 			if ("photoshop:ICCProfile".equals(prop))
    214 			{
    215 				isInternal = true;
    216 			}
    217 		}
    218 		else if (NS_CAMERARAW.equals(schema))
    219 		{
    220 			if ("crs:Version".equals(prop) || "crs:RawFileName".equals(prop)
    221 					|| "crs:ToneCurveName".equals(prop))
    222 			{
    223 				isInternal = true;
    224 			}
    225 		}
    226 		else if (NS_ADOBESTOCKPHOTO.equals(schema))
    227 		{
    228 			isInternal = true;
    229 		}
    230 		else if (NS_XMP_MM.equals(schema))
    231 		{
    232 			isInternal = true;
    233 		}
    234 		else if (TYPE_TEXT.equals(schema))
    235 		{
    236 			isInternal = true;
    237 		}
    238 		else if (TYPE_PAGEDFILE.equals(schema))
    239 		{
    240 			isInternal = true;
    241 		}
    242 		else if (TYPE_GRAPHICS.equals(schema))
    243 		{
    244 			isInternal = true;
    245 		}
    246 		else if (TYPE_IMAGE.equals(schema))
    247 		{
    248 			isInternal = true;
    249 		}
    250 		else if (TYPE_FONT.equals(schema))
    251 		{
    252 			isInternal = true;
    253 		}
    254 
    255 		return isInternal;
    256 	}
    257 
    258 
    259 	/**
    260 	 * Check some requirements for an UUID:
    261 	 * <ul>
    262 	 * <li>Length of the UUID is 32</li>
    263 	 * <li>The Delimiter count is 4 and all the 4 delimiter are on their right
    264 	 * position (8,13,18,23)</li>
    265 	 * </ul>
    266 	 *
    267 	 *
    268 	 * @param uuid uuid to test
    269 	 * @return true - this is a well formed UUID, false - UUID has not the expected format
    270 	 */
    271 
    272 	static boolean checkUUIDFormat(String uuid)
    273 	{
    274 		boolean result = true;
    275 		int delimCnt = 0;
    276 		int delimPos = 0;
    277 
    278 		if (uuid == null)
    279 		{
    280 			return false;
    281 		}
    282 
    283 		for (delimPos = 0; delimPos < uuid.length(); delimPos++)
    284 		{
    285 			if (uuid.charAt(delimPos) == '-')
    286 			{
    287 				delimCnt++;
    288 				result = result  &&
    289 					(delimPos == 8 || delimPos == 13 || delimPos == 18 || delimPos == 23);
    290 			}
    291 		}
    292 
    293 		return result && UUID_SEGMENT_COUNT == delimCnt && UUID_LENGTH == delimPos;
    294 	}
    295 
    296 
    297 	/**
    298 	 * Simple check for valid XMLNames. Within ASCII range<br>
    299 	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
    300 	 * are accepted, above all characters (which is not entirely
    301 	 * correct according to the XML Spec.
    302 	 *
    303 	 * @param name an XML Name
    304 	 * @return Return <code>true</code> if the name is correct.
    305 	 */
    306 	public static boolean isXMLName(String name)
    307 	{
    308 		if (name.length() > 0  &&  !isNameStartChar(name.charAt(0)))
    309 		{
    310 			return false;
    311 		}
    312 		for (int i = 1; i < name.length(); i++)
    313 		{
    314 			if (!isNameChar(name.charAt(i)))
    315 			{
    316 				return false;
    317 			}
    318 		}
    319 		return true;
    320 	}
    321 
    322 
    323 	/**
    324 	 * Checks if the value is a legal "unqualified" XML name, as
    325 	 * defined in the XML Namespaces proposed recommendation.
    326 	 * These are XML names, except that they must not contain a colon.
    327 	 * @param name the value to check
    328 	 * @return Returns true if the name is a valid "unqualified" XML name.
    329 	 */
    330 	public static boolean isXMLNameNS(String name)
    331 	{
    332 		if (name.length() > 0  &&  (!isNameStartChar(name.charAt(0))  ||  name.charAt(0) == ':'))
    333 		{
    334 			return false;
    335 		}
    336 		for (int i = 1; i < name.length(); i++)
    337 		{
    338 			if (!isNameChar(name.charAt(i))  ||  name.charAt(i) == ':')
    339 			{
    340 				return false;
    341 			}
    342 		}
    343 		return true;
    344 	}
    345 
    346 
    347 	/**
    348 	 * @param c  a char
    349 	 * @return Returns true if the char is an ASCII control char.
    350 	 */
    351 	static boolean isControlChar(char c)
    352 	{
    353 		return (c <= 0x1F  ||  c == 0x7F)  &&
    354 				c != 0x09  &&  c != 0x0A  &&  c != 0x0D;
    355 	}
    356 
    357 
    358 	/**
    359 	 * Serializes the node value in XML encoding. Its used for tag bodies and
    360 	 * attributes.<br>
    361 	 * <em>Note:</em> The attribute is always limited by quotes,
    362 	 * thats why <code>&amp;apos;</code> is never serialized.<br>
    363 	 * <em>Note:</em> Control chars are written unescaped, but if the user uses others than tab, LF
    364 	 * and CR the resulting XML will become invalid.
    365 	 * @param value a string
    366 	 * @param forAttribute flag if string is attribute value (need to additional escape quotes)
    367 	 * @param escapeWhitespaces Decides if LF, CR and TAB are escaped.
    368 	 * @return Returns the value ready for XML output.
    369 	 */
    370 	public static String escapeXML(String value, boolean forAttribute, boolean escapeWhitespaces)
    371 	{
    372 		// quick check if character are contained that need special treatment
    373 		boolean needsEscaping = false;
    374 		for (int i = 0; i < value.length (); i++)
    375         {
    376             char c = value.charAt (i);
    377 			if (
    378 				 c == '<'  ||  c == '>'  ||  c == '&'  ||							    // XML chars
    379 				(escapeWhitespaces  &&  (c == '\t'  ||  c == '\n'  ||  c == '\r'))  ||
    380 				(forAttribute  &&  c == '"'))
    381 			{
    382 				needsEscaping = true;
    383 				break;
    384 			}
    385         }
    386 
    387 		if (!needsEscaping)
    388 		{
    389 			// fast path
    390 			return value;
    391 		}
    392 		else
    393 		{
    394 			// slow path with escaping
    395 			StringBuffer buffer = new StringBuffer(value.length() * 4 / 3);
    396 	        for (int i = 0; i < value.length (); i++)
    397 	        {
    398 	            char c = value.charAt (i);
    399 	            if (!(escapeWhitespaces  &&  (c == '\t'  ||  c == '\n'  ||  c == '\r')))
    400 	            {
    401 	            	switch (c)
    402 		            {
    403 	            		// we do what "Canonical XML" expects
    404 	            		// AUDIT: &apos; not serialized as only outer qoutes are used
    405 		              	case '<':	buffer.append("&lt;"); continue;
    406 		              	case '>':	buffer.append("&gt;"); continue;
    407 		              	case '&':	buffer.append("&amp;"); continue;
    408 		              	case '"': 	buffer.append(forAttribute ? "&quot;" : "\""); continue;
    409 		              	default:	buffer.append(c); continue;
    410 		            }
    411 		        }
    412 	            else
    413 	            {
    414 	            	// write control chars escaped,
    415 	            	// if there are others than tab, LF and CR the xml will become invalid.
    416 	            	buffer.append("&#x");
    417 	            	buffer.append(Integer.toHexString(c).toUpperCase());
    418 	            	buffer.append(';');
    419 	            }
    420 	        }
    421 	        return buffer.toString();
    422 		}
    423 	}
    424 
    425 
    426 	/**
    427 	 * Replaces the ASCII control chars with a space.
    428 	 *
    429 	 * @param value
    430 	 *            a node value
    431 	 * @return Returns the cleaned up value
    432 	 */
    433 	static String removeControlChars(String value)
    434 	{
    435 		StringBuffer buffer = new StringBuffer(value);
    436 		for (int i = 0; i < buffer.length(); i++)
    437 		{
    438 			if (isControlChar(buffer.charAt(i)))
    439 			{
    440 				buffer.setCharAt(i, ' ');
    441 			}
    442 		}
    443 		return buffer.toString();
    444 	}
    445 
    446 
    447 	/**
    448 	 * Simple check if a character is a valid XML start name char.
    449 	 * Within ASCII range<br>
    450 	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
    451 	 * are accepted, above all characters (which is not entirely
    452 	 * correct according to the XML Spec)
    453 	 *
    454 	 * @param ch a character
    455 	 * @return Returns true if the character is a valid first char of an XML name.
    456 	 */
    457 	private static boolean isNameStartChar(char ch)
    458 	{
    459 		return ch > 0xFF  ||  xmlNameStartChars[ch];
    460 	}
    461 
    462 
    463 	/**
    464 	 * Simple check if a character is a valid XML name char
    465 	 * (every char except the first one).
    466 	 * Within ASCII range<br>
    467 	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
    468 	 * are accepted, above all characters (which is not entirely
    469 	 * correct according to the XML Spec)
    470 	 *
    471 	 * @param ch a character
    472 	 * @return Returns true if the character is a valid char of an XML name.
    473 	 */
    474 	private static boolean isNameChar(char ch)
    475 	{
    476 		return ch > 0xFF  ||  xmlNameChars[ch];
    477 	}
    478 
    479 
    480 	/**
    481 	 * Initializes the char tables for later use.
    482 	 */
    483 	private static void initCharTables()
    484 	{
    485 		xmlNameChars = new boolean[0x0100];
    486 		xmlNameStartChars = new boolean[0x0100];
    487 
    488 		for (char ch = 0; ch < xmlNameChars.length; ch++)
    489 		{
    490 			xmlNameStartChars[ch] =
    491 				('a' <= ch  &&  ch <= 'z')  ||
    492 				('A' <= ch  &&  ch <= 'Z')  ||
    493 				ch == ':'  ||
    494 				ch == '_'  ||
    495 				(0xC0 <= ch  &&  ch <= 0xD6)  ||
    496 				(0xD8 <= ch  &&  ch <= 0xF6);
    497 
    498 			xmlNameChars[ch] =
    499 				('a' <= ch  &&  ch <= 'z')  ||
    500 				('A' <= ch  &&  ch <= 'Z')  ||
    501 				('0' <= ch  &&  ch <= '9')  ||
    502 				ch == ':'  ||
    503 				ch == '_'  ||
    504 				ch == '-'  ||
    505 				ch == '.'  ||
    506 				ch == 0xB7  ||
    507 				(0xC0 <= ch  &&  ch <= 0xD6)  ||
    508 				(0xD8 <= ch  &&  ch <= 0xF6);
    509 		}
    510 	}
    511 }