1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.harmony.xml; 18 19 import java.io.IOException; 20 import java.io.InputStream; 21 import java.io.Reader; 22 import java.net.URI; 23 import java.net.URL; 24 import java.net.URLConnection; 25 import java.util.logging.Level; 26 import java.util.logging.Logger; 27 import org.xml.sax.Attributes; 28 import org.xml.sax.ContentHandler; 29 import org.xml.sax.DTDHandler; 30 import org.xml.sax.EntityResolver; 31 import org.xml.sax.InputSource; 32 import org.xml.sax.Locator; 33 import org.xml.sax.SAXException; 34 import org.xml.sax.SAXParseException; 35 import org.xml.sax.ext.LexicalHandler; 36 37 /** 38 * Adapts SAX API to the Expat native XML parser. Not intended for reuse 39 * across documents. 40 * 41 * @see org.apache.harmony.xml.ExpatPullParser 42 * @see org.apache.harmony.xml.ExpatReader 43 */ 44 class ExpatParser { 45 46 private static final int BUFFER_SIZE = 8096; // in bytes 47 48 /** Pointer to XML_Parser instance. */ 49 private int pointer; 50 51 private boolean inStartElement = false; 52 private int attributeCount = -1; 53 private int attributePointer = 0; 54 55 private final Locator locator = new ExpatLocator(); 56 57 private final ExpatReader xmlReader; 58 59 private final String publicId; 60 private final String systemId; 61 62 private final String encoding; 63 64 private final ExpatAttributes attributes = new CurrentAttributes(); 65 66 private static final String OUTSIDE_START_ELEMENT 67 = "Attributes can only be used within the scope of startElement()."; 68 69 /** We default to UTF-8 when the user doesn't specify an encoding. */ 70 private static final String DEFAULT_ENCODING = "UTF-8"; 71 72 /** Encoding used for Java chars, used to parse Readers and Strings */ 73 /*package*/ static final String CHARACTER_ENCODING = "UTF-16"; 74 75 /** Timeout for HTTP connections (in ms) */ 76 private static final int TIMEOUT = 20 * 1000; 77 78 /** 79 * Constructs a new parser with the specified encoding. 80 */ 81 /*package*/ ExpatParser(String encoding, ExpatReader xmlReader, 82 boolean processNamespaces, String publicId, String systemId) { 83 this.publicId = publicId; 84 this.systemId = systemId; 85 86 this.xmlReader = xmlReader; 87 88 /* 89 * TODO: Let Expat try to guess the encoding instead of defaulting. 90 * Unfortunately, I don't know how to tell which encoding Expat picked, 91 * so I won't know how to encode "<externalEntity>" below. The solution 92 * I think is to fix Expat to not require the "<externalEntity>" 93 * workaround. 94 */ 95 this.encoding = encoding == null ? DEFAULT_ENCODING : encoding; 96 this.pointer = initialize( 97 this.encoding, 98 processNamespaces 99 ); 100 } 101 102 /** 103 * Used by {@link EntityParser}. 104 */ 105 private ExpatParser(String encoding, ExpatReader xmlReader, int pointer, 106 String publicId, String systemId) { 107 this.encoding = encoding; 108 this.xmlReader = xmlReader; 109 this.pointer = pointer; 110 this.systemId = systemId; 111 this.publicId = publicId; 112 } 113 114 /** 115 * Initializes native resources. 116 * 117 * @return the pointer to the native parser 118 */ 119 private native int initialize(String encoding, boolean namespacesEnabled); 120 121 /** 122 * Called at the start of an element. 123 * 124 * @param uri namespace URI of element or "" if namespace processing is 125 * disabled 126 * @param localName local name of element or "" if namespace processing is 127 * disabled 128 * @param qName qualified name or "" if namespace processing is enabled 129 * @param attributePointer pointer to native attribute char*--we keep 130 * a separate pointer so we can detach it from the parser instance 131 * @param attributeCount number of attributes 132 */ 133 /*package*/ void startElement(String uri, String localName, String qName, 134 int attributePointer, int attributeCount) throws SAXException { 135 ContentHandler contentHandler = xmlReader.contentHandler; 136 if (contentHandler == null) { 137 return; 138 } 139 140 try { 141 inStartElement = true; 142 this.attributePointer = attributePointer; 143 this.attributeCount = attributeCount; 144 145 contentHandler.startElement( 146 uri, localName, qName, this.attributes); 147 } finally { 148 inStartElement = false; 149 this.attributeCount = -1; 150 this.attributePointer = 0; 151 } 152 } 153 154 /*package*/ void endElement(String uri, String localName, String qName) 155 throws SAXException { 156 ContentHandler contentHandler = xmlReader.contentHandler; 157 if (contentHandler != null) { 158 contentHandler.endElement(uri, localName, qName); 159 } 160 } 161 162 /*package*/ void text(char[] text, int length) throws SAXException { 163 ContentHandler contentHandler = xmlReader.contentHandler; 164 if (contentHandler != null) { 165 contentHandler.characters(text, 0, length); 166 } 167 } 168 169 /*package*/ void comment(char[] text, int length) throws SAXException { 170 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 171 if (lexicalHandler != null) { 172 lexicalHandler.comment(text, 0, length); 173 } 174 } 175 176 /*package*/ void startCdata() throws SAXException { 177 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 178 if (lexicalHandler != null) { 179 lexicalHandler.startCDATA(); 180 } 181 } 182 183 /*package*/ void endCdata() throws SAXException { 184 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 185 if (lexicalHandler != null) { 186 lexicalHandler.endCDATA(); 187 } 188 } 189 190 /*package*/ void startNamespace(String prefix, String uri) 191 throws SAXException { 192 ContentHandler contentHandler = xmlReader.contentHandler; 193 if (contentHandler != null) { 194 contentHandler.startPrefixMapping(prefix, uri); 195 } 196 } 197 198 /*package*/ void endNamespace(String prefix) throws SAXException { 199 ContentHandler contentHandler = xmlReader.contentHandler; 200 if (contentHandler != null) { 201 contentHandler.endPrefixMapping(prefix); 202 } 203 } 204 205 /*package*/ void startDtd(String name, String publicId, String systemId) 206 throws SAXException { 207 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 208 if (lexicalHandler != null) { 209 lexicalHandler.startDTD(name, publicId, systemId); 210 } 211 } 212 213 /*package*/ void endDtd() throws SAXException { 214 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 215 if (lexicalHandler != null) { 216 lexicalHandler.endDTD(); 217 } 218 } 219 220 /*package*/ void processingInstruction(String target, String data) 221 throws SAXException { 222 ContentHandler contentHandler = xmlReader.contentHandler; 223 if (contentHandler != null) { 224 contentHandler.processingInstruction(target, data); 225 } 226 } 227 228 /*package*/ void notationDecl(String name, String publicId, String systemId) throws SAXException { 229 DTDHandler dtdHandler = xmlReader.dtdHandler; 230 if (dtdHandler != null) { 231 dtdHandler.notationDecl(name, publicId, systemId); 232 } 233 } 234 235 /*package*/ void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { 236 DTDHandler dtdHandler = xmlReader.dtdHandler; 237 if (dtdHandler != null) { 238 dtdHandler.unparsedEntityDecl(name, publicId, systemId, notationName); 239 } 240 } 241 242 /** 243 * Handles an external entity. 244 * 245 * @param context to be passed back to Expat when we parse the entity 246 * @param publicId the publicId of the entity 247 * @param systemId the systemId of the entity 248 */ 249 /*package*/ void handleExternalEntity(String context, String publicId, 250 String systemId) throws SAXException, IOException { 251 EntityResolver entityResolver = xmlReader.entityResolver; 252 if (entityResolver == null) { 253 return; 254 } 255 256 /* 257 * The spec. is terribly under-specified here. It says that if the 258 * systemId is a URL, we should try to resolve it, but it doesn't 259 * specify how to tell whether or not the systemId is a URL let alone 260 * how to resolve it. 261 * 262 * Other implementations do various insane things. We try to keep it 263 * simple: if the systemId parses as a URI and it's relative, we try to 264 * resolve it against the parent document's systemId. If anything goes 265 * wrong, we go with the original systemId. If crazybob had designed 266 * the API, he would have left all resolving to the EntityResolver. 267 */ 268 if (this.systemId != null) { 269 try { 270 URI systemUri = new URI(systemId); 271 if (!systemUri.isAbsolute() && !systemUri.isOpaque()) { 272 // It could be relative (or it may not be a URI at all!) 273 URI baseUri = new URI(this.systemId); 274 systemUri = baseUri.resolve(systemUri); 275 276 // Replace systemId w/ resolved URI 277 systemId = systemUri.toString(); 278 } 279 } catch (Exception e) { 280 Logger.getLogger(ExpatParser.class.getName()).log(Level.INFO, 281 "Could not resolve '" + systemId + "' relative to" 282 + " '" + this.systemId + "' at " + locator, e); 283 } 284 } 285 286 InputSource inputSource = entityResolver.resolveEntity( 287 publicId, systemId); 288 if (inputSource == null) { 289 /* 290 * The spec. actually says that we should try to treat systemId 291 * as a URL and download and parse its contents here, but an 292 * entity resolver can easily accomplish the same by returning 293 * new InputSource(systemId). 294 * 295 * Downloading external entities by default would result in several 296 * unwanted DTD downloads, not to mention pose a security risk 297 * when parsing untrusted XML -- see for example 298 * http://archive.cert.uni-stuttgart.de/bugtraq/2002/10/msg00421.html -- 299 * so we just do nothing instead. This also enables the user to 300 * opt out of entity parsing when using 301 * {@link org.xml.sax.helpers.DefaultHandler}, something that 302 * wouldn't be possible otherwise. 303 */ 304 return; 305 } 306 307 String encoding = pickEncoding(inputSource); 308 int pointer = createEntityParser(this.pointer, context); 309 try { 310 EntityParser entityParser = new EntityParser(encoding, xmlReader, 311 pointer, inputSource.getPublicId(), 312 inputSource.getSystemId()); 313 314 parseExternalEntity(entityParser, inputSource); 315 } finally { 316 releaseParser(pointer); 317 } 318 } 319 320 /** 321 * Picks an encoding for an external entity. Defaults to UTF-8. 322 */ 323 private String pickEncoding(InputSource inputSource) { 324 Reader reader = inputSource.getCharacterStream(); 325 if (reader != null) { 326 return CHARACTER_ENCODING; 327 } 328 329 String encoding = inputSource.getEncoding(); 330 return encoding == null ? DEFAULT_ENCODING : encoding; 331 } 332 333 /** 334 * Parses the the external entity provided by the input source. 335 */ 336 private void parseExternalEntity(ExpatParser entityParser, 337 InputSource inputSource) throws IOException, SAXException { 338 /* 339 * Expat complains if the external entity isn't wrapped with a root 340 * element so we add one and ignore it later on during parsing. 341 */ 342 343 // Try the character stream. 344 Reader reader = inputSource.getCharacterStream(); 345 if (reader != null) { 346 try { 347 entityParser.append("<externalEntity>"); 348 entityParser.parseFragment(reader); 349 entityParser.append("</externalEntity>"); 350 } finally { 351 // TODO: Don't eat original exception when close() throws. 352 reader.close(); 353 } 354 return; 355 } 356 357 // Try the byte stream. 358 InputStream in = inputSource.getByteStream(); 359 if (in != null) { 360 try { 361 entityParser.append("<externalEntity>" 362 .getBytes(entityParser.encoding)); 363 entityParser.parseFragment(in); 364 entityParser.append("</externalEntity>" 365 .getBytes(entityParser.encoding)); 366 } finally { 367 // TODO: Don't eat original exception when close() throws. 368 in.close(); 369 } 370 return; 371 } 372 373 // Make sure we use the user-provided systemId. 374 String systemId = inputSource.getSystemId(); 375 if (systemId == null) { 376 // TODO: We could just try our systemId here. 377 throw new ParseException("No input specified.", locator); 378 } 379 380 // Try the system id. 381 in = openUrl(systemId); 382 try { 383 entityParser.append("<externalEntity>" 384 .getBytes(entityParser.encoding)); 385 entityParser.parseFragment(in); 386 entityParser.append("</externalEntity>" 387 .getBytes(entityParser.encoding)); 388 } finally { 389 in.close(); 390 } 391 } 392 393 /** 394 * Creates a native entity parser. 395 * 396 * @param parentPointer pointer to parent Expat parser 397 * @param context passed to {@link #handleExternalEntity} 398 * @return pointer to native parser 399 */ 400 private static native int createEntityParser(int parentPointer, String context); 401 402 /** 403 * Appends part of an XML document. This parser will parse the given XML to 404 * the extent possible and dispatch to the appropriate methods. 405 * 406 * @param xml a whole or partial snippet of XML 407 * @throws SAXException if an error occurs during parsing 408 */ 409 /*package*/ void append(String xml) throws SAXException { 410 try { 411 appendString(this.pointer, xml, false); 412 } catch (ExpatException e) { 413 throw new ParseException(e.getMessage(), this.locator); 414 } 415 } 416 417 private native void appendString(int pointer, String xml, boolean isFinal) 418 throws SAXException, ExpatException; 419 420 /** 421 * Appends part of an XML document. This parser will parse the given XML to 422 * the extent possible and dispatch to the appropriate methods. 423 * 424 * @param xml a whole or partial snippet of XML 425 * @param offset into the char[] 426 * @param length of characters to use 427 * @throws SAXException if an error occurs during parsing 428 */ 429 /*package*/ void append(char[] xml, int offset, int length) 430 throws SAXException { 431 try { 432 appendChars(this.pointer, xml, offset, length); 433 } catch (ExpatException e) { 434 throw new ParseException(e.getMessage(), this.locator); 435 } 436 } 437 438 private native void appendChars(int pointer, char[] xml, int offset, 439 int length) throws SAXException, ExpatException; 440 441 /** 442 * Appends part of an XML document. This parser will parse the given XML to 443 * the extent possible and dispatch to the appropriate methods. 444 * 445 * @param xml a whole or partial snippet of XML 446 * @throws SAXException if an error occurs during parsing 447 */ 448 /*package*/ void append(byte[] xml) throws SAXException { 449 append(xml, 0, xml.length); 450 } 451 452 /** 453 * Appends part of an XML document. This parser will parse the given XML to 454 * the extent possible and dispatch to the appropriate methods. 455 * 456 * @param xml a whole or partial snippet of XML 457 * @param offset into the byte[] 458 * @param length of bytes to use 459 * @throws SAXException if an error occurs during parsing 460 */ 461 /*package*/ void append(byte[] xml, int offset, int length) 462 throws SAXException { 463 try { 464 appendBytes(this.pointer, xml, offset, length); 465 } catch (ExpatException e) { 466 throw new ParseException(e.getMessage(), this.locator); 467 } 468 } 469 470 private native void appendBytes(int pointer, byte[] xml, int offset, 471 int length) throws SAXException, ExpatException; 472 473 /** 474 * Parses an XML document from the given input stream. 475 */ 476 /*package*/ void parseDocument(InputStream in) throws IOException, 477 SAXException { 478 startDocument(); 479 parseFragment(in); 480 finish(); 481 endDocument(); 482 } 483 484 /** 485 * Parses an XML Document from the given reader. 486 */ 487 /*package*/ void parseDocument(Reader in) throws IOException, SAXException { 488 startDocument(); 489 parseFragment(in); 490 finish(); 491 endDocument(); 492 } 493 494 /** 495 * Parses XML from the given Reader. 496 */ 497 private void parseFragment(Reader in) throws IOException, SAXException { 498 char[] buffer = new char[BUFFER_SIZE / 2]; 499 int length; 500 while ((length = in.read(buffer)) != -1) { 501 try { 502 appendChars(this.pointer, buffer, 0, length); 503 } catch (ExpatException e) { 504 throw new ParseException(e.getMessage(), locator); 505 } 506 } 507 } 508 509 /** 510 * Parses XML from the given input stream. 511 */ 512 private void parseFragment(InputStream in) 513 throws IOException, SAXException { 514 byte[] buffer = new byte[BUFFER_SIZE]; 515 int length; 516 while ((length = in.read(buffer)) != -1) { 517 try { 518 appendBytes(this.pointer, buffer, 0, length); 519 } catch (ExpatException e) { 520 throw new ParseException(e.getMessage(), this.locator); 521 } 522 } 523 } 524 525 private void startDocument() throws SAXException { 526 ContentHandler contentHandler = xmlReader.contentHandler; 527 if (contentHandler != null) { 528 contentHandler.setDocumentLocator(this.locator); 529 contentHandler.startDocument(); 530 } 531 } 532 533 private void endDocument() throws SAXException { 534 ContentHandler contentHandler; 535 contentHandler = xmlReader.contentHandler; 536 if (contentHandler != null) { 537 contentHandler.endDocument(); 538 } 539 } 540 541 /** 542 * Indicate that we're finished parsing. 543 * 544 * @throws SAXException if the xml is incomplete 545 */ 546 /*package*/ void finish() throws SAXException { 547 try { 548 appendString(this.pointer, "", true); 549 } catch (ExpatException e) { 550 throw new ParseException(e.getMessage(), this.locator); 551 } 552 } 553 554 @Override protected synchronized void finalize() throws Throwable { 555 try { 556 if (this.pointer != 0) { 557 release(this.pointer); 558 this.pointer = 0; 559 } 560 } finally { 561 super.finalize(); 562 } 563 } 564 565 /** 566 * Releases all native objects. 567 */ 568 private native void release(int pointer); 569 570 /** 571 * Releases native parser only. 572 */ 573 private static native void releaseParser(int pointer); 574 575 /** 576 * Initialize static resources. 577 */ 578 private static native void staticInitialize(String emptyString); 579 580 static { 581 staticInitialize(""); 582 } 583 584 /** 585 * Gets the current line number within the XML file. 586 */ 587 private int line() { 588 return line(this.pointer); 589 } 590 591 private static native int line(int pointer); 592 593 /** 594 * Gets the current column number within the XML file. 595 */ 596 private int column() { 597 return column(this.pointer); 598 } 599 600 private static native int column(int pointer); 601 602 /** 603 * Clones the current attributes so they can be used outside of 604 * startElement(). 605 */ 606 /*package*/ Attributes cloneAttributes() { 607 if (!inStartElement) { 608 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 609 } 610 611 if (attributeCount == 0) { 612 return ClonedAttributes.EMPTY; 613 } 614 615 int clonePointer 616 = cloneAttributes(this.attributePointer, this.attributeCount); 617 return new ClonedAttributes(pointer, clonePointer, attributeCount); 618 } 619 620 private static native int cloneAttributes(int pointer, int attributeCount); 621 622 /** 623 * Used for cloned attributes. 624 */ 625 private static class ClonedAttributes extends ExpatAttributes { 626 627 private static final Attributes EMPTY = new ClonedAttributes(0, 0, 0); 628 629 private final int parserPointer; 630 private int pointer; 631 private final int length; 632 633 /** 634 * Constructs a Java wrapper for native attributes. 635 * 636 * @param parserPointer pointer to the parse, can be 0 if length is 0. 637 * @param pointer pointer to the attributes array, can be 0 if the 638 * length is 0. 639 * @param length number of attributes 640 */ 641 private ClonedAttributes(int parserPointer, int pointer, int length) { 642 this.parserPointer = parserPointer; 643 this.pointer = pointer; 644 this.length = length; 645 } 646 647 @Override 648 public int getParserPointer() { 649 return this.parserPointer; 650 } 651 652 @Override 653 public int getPointer() { 654 return pointer; 655 } 656 657 @Override 658 public int getLength() { 659 return length; 660 } 661 662 @Override protected synchronized void finalize() throws Throwable { 663 try { 664 if (pointer != 0) { 665 freeAttributes(pointer); 666 pointer = 0; 667 } 668 } finally { 669 super.finalize(); 670 } 671 } 672 } 673 674 private class ExpatLocator implements Locator { 675 676 public String getPublicId() { 677 return publicId; 678 } 679 680 public String getSystemId() { 681 return systemId; 682 } 683 684 public int getLineNumber() { 685 return line(); 686 } 687 688 public int getColumnNumber() { 689 return column(); 690 } 691 692 @Override 693 public String toString() { 694 return "Locator[publicId: " + publicId + ", systemId: " + systemId 695 + ", line: " + getLineNumber() 696 + ", column: " + getColumnNumber() + "]"; 697 } 698 } 699 700 /** 701 * Attributes that are only valid during startElement(). 702 */ 703 private class CurrentAttributes extends ExpatAttributes { 704 705 @Override 706 public int getParserPointer() { 707 return pointer; 708 } 709 710 @Override 711 public int getPointer() { 712 if (!inStartElement) { 713 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 714 } 715 return attributePointer; 716 } 717 718 @Override 719 public int getLength() { 720 if (!inStartElement) { 721 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 722 } 723 return attributeCount; 724 } 725 } 726 727 /** 728 * Includes line and column in the message. 729 */ 730 private static class ParseException extends SAXParseException { 731 732 private ParseException(String message, Locator locator) { 733 super(makeMessage(message, locator), locator); 734 } 735 736 private static String makeMessage(String message, Locator locator) { 737 return makeMessage(message, locator.getLineNumber(), 738 locator.getColumnNumber()); 739 } 740 741 private static String makeMessage( 742 String message, int line, int column) { 743 return "At line " + line + ", column " 744 + column + ": " + message; 745 } 746 } 747 748 /** 749 * Opens an InputStream for the given URL. 750 */ 751 /*package*/ static InputStream openUrl(String url) throws IOException { 752 try { 753 URLConnection urlConnection = new URL(url).openConnection(); 754 urlConnection.setConnectTimeout(TIMEOUT); 755 urlConnection.setReadTimeout(TIMEOUT); 756 urlConnection.setDoInput(true); 757 urlConnection.setDoOutput(false); 758 return urlConnection.getInputStream(); 759 } catch (Exception e) { 760 IOException ioe = new IOException("Couldn't open " + url); 761 ioe.initCause(e); 762 throw ioe; 763 } 764 } 765 766 /** 767 * Parses an external entity. 768 */ 769 private static class EntityParser extends ExpatParser { 770 771 private int depth = 0; 772 773 private EntityParser(String encoding, ExpatReader xmlReader, 774 int pointer, String publicId, String systemId) { 775 super(encoding, xmlReader, pointer, publicId, systemId); 776 } 777 778 @Override 779 void startElement(String uri, String localName, String qName, 780 int attributePointer, int attributeCount) throws SAXException { 781 /* 782 * Skip topmost element generated by our workaround in 783 * {@link #handleExternalEntity}. 784 */ 785 if (depth++ > 0) { 786 super.startElement(uri, localName, qName, attributePointer, 787 attributeCount); 788 } 789 } 790 791 @Override 792 void endElement(String uri, String localName, String qName) 793 throws SAXException { 794 if (--depth > 0) { 795 super.endElement(uri, localName, qName); 796 } 797 } 798 799 @Override 800 @SuppressWarnings("FinalizeDoesntCallSuperFinalize") 801 protected synchronized void finalize() throws Throwable { 802 /* 803 * Don't release our native resources. We do so explicitly in 804 * {@link #handleExternalEntity} and we don't want to release the 805 * parsing context--our parent is using it. 806 */ 807 } 808 } 809 } 810