1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.harmony.xml; 18 19 import java.io.IOException; 20 import java.io.InputStream; 21 import java.io.Reader; 22 import java.net.URI; 23 import java.net.URL; 24 import java.net.URLConnection; 25 import libcore.io.IoUtils; 26 import org.xml.sax.Attributes; 27 import org.xml.sax.ContentHandler; 28 import org.xml.sax.DTDHandler; 29 import org.xml.sax.EntityResolver; 30 import org.xml.sax.InputSource; 31 import org.xml.sax.Locator; 32 import org.xml.sax.SAXException; 33 import org.xml.sax.SAXParseException; 34 import org.xml.sax.ext.LexicalHandler; 35 36 /** 37 * Adapts SAX API to the Expat native XML parser. Not intended for reuse 38 * across documents. 39 * 40 * @see org.apache.harmony.xml.ExpatReader 41 */ 42 class ExpatParser { 43 44 private static final int BUFFER_SIZE = 8096; // in bytes 45 46 /** Pointer to XML_Parser instance. */ 47 private int pointer; 48 49 private boolean inStartElement = false; 50 private int attributeCount = -1; 51 private int attributePointer = 0; 52 53 private final Locator locator = new ExpatLocator(); 54 55 private final ExpatReader xmlReader; 56 57 private final String publicId; 58 private final String systemId; 59 60 private final String encoding; 61 62 private final ExpatAttributes attributes = new CurrentAttributes(); 63 64 private static final String OUTSIDE_START_ELEMENT 65 = "Attributes can only be used within the scope of startElement()."; 66 67 /** We default to UTF-8 when the user doesn't specify an encoding. */ 68 private static final String DEFAULT_ENCODING = "UTF-8"; 69 70 /** Encoding used for Java chars, used to parse Readers and Strings */ 71 /*package*/ static final String CHARACTER_ENCODING = "UTF-16"; 72 73 /** Timeout for HTTP connections (in ms) */ 74 private static final int TIMEOUT = 20 * 1000; 75 76 /** 77 * Constructs a new parser with the specified encoding. 78 */ 79 /*package*/ ExpatParser(String encoding, ExpatReader xmlReader, 80 boolean processNamespaces, String publicId, String systemId) { 81 this.publicId = publicId; 82 this.systemId = systemId; 83 84 this.xmlReader = xmlReader; 85 86 /* 87 * TODO: Let Expat try to guess the encoding instead of defaulting. 88 * Unfortunately, I don't know how to tell which encoding Expat picked, 89 * so I won't know how to encode "<externalEntity>" below. The solution 90 * I think is to fix Expat to not require the "<externalEntity>" 91 * workaround. 92 */ 93 this.encoding = encoding == null ? DEFAULT_ENCODING : encoding; 94 this.pointer = initialize( 95 this.encoding, 96 processNamespaces 97 ); 98 } 99 100 /** 101 * Used by {@link EntityParser}. 102 */ 103 private ExpatParser(String encoding, ExpatReader xmlReader, int pointer, 104 String publicId, String systemId) { 105 this.encoding = encoding; 106 this.xmlReader = xmlReader; 107 this.pointer = pointer; 108 this.systemId = systemId; 109 this.publicId = publicId; 110 } 111 112 /** 113 * Initializes native resources. 114 * 115 * @return the pointer to the native parser 116 */ 117 private native int initialize(String encoding, boolean namespacesEnabled); 118 119 /** 120 * Called at the start of an element. 121 * 122 * @param uri namespace URI of element or "" if namespace processing is 123 * disabled 124 * @param localName local name of element or "" if namespace processing is 125 * disabled 126 * @param qName qualified name or "" if namespace processing is enabled 127 * @param attributePointer pointer to native attribute char*--we keep 128 * a separate pointer so we can detach it from the parser instance 129 * @param attributeCount number of attributes 130 */ 131 /*package*/ void startElement(String uri, String localName, String qName, 132 int attributePointer, int attributeCount) throws SAXException { 133 ContentHandler contentHandler = xmlReader.contentHandler; 134 if (contentHandler == null) { 135 return; 136 } 137 138 try { 139 inStartElement = true; 140 this.attributePointer = attributePointer; 141 this.attributeCount = attributeCount; 142 143 contentHandler.startElement( 144 uri, localName, qName, this.attributes); 145 } finally { 146 inStartElement = false; 147 this.attributeCount = -1; 148 this.attributePointer = 0; 149 } 150 } 151 152 /*package*/ void endElement(String uri, String localName, String qName) 153 throws SAXException { 154 ContentHandler contentHandler = xmlReader.contentHandler; 155 if (contentHandler != null) { 156 contentHandler.endElement(uri, localName, qName); 157 } 158 } 159 160 /*package*/ void text(char[] text, int length) throws SAXException { 161 ContentHandler contentHandler = xmlReader.contentHandler; 162 if (contentHandler != null) { 163 contentHandler.characters(text, 0, length); 164 } 165 } 166 167 /*package*/ void comment(char[] text, int length) throws SAXException { 168 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 169 if (lexicalHandler != null) { 170 lexicalHandler.comment(text, 0, length); 171 } 172 } 173 174 /*package*/ void startCdata() throws SAXException { 175 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 176 if (lexicalHandler != null) { 177 lexicalHandler.startCDATA(); 178 } 179 } 180 181 /*package*/ void endCdata() throws SAXException { 182 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 183 if (lexicalHandler != null) { 184 lexicalHandler.endCDATA(); 185 } 186 } 187 188 /*package*/ void startNamespace(String prefix, String uri) 189 throws SAXException { 190 ContentHandler contentHandler = xmlReader.contentHandler; 191 if (contentHandler != null) { 192 contentHandler.startPrefixMapping(prefix, uri); 193 } 194 } 195 196 /*package*/ void endNamespace(String prefix) throws SAXException { 197 ContentHandler contentHandler = xmlReader.contentHandler; 198 if (contentHandler != null) { 199 contentHandler.endPrefixMapping(prefix); 200 } 201 } 202 203 /*package*/ void startDtd(String name, String publicId, String systemId) 204 throws SAXException { 205 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 206 if (lexicalHandler != null) { 207 lexicalHandler.startDTD(name, publicId, systemId); 208 } 209 } 210 211 /*package*/ void endDtd() throws SAXException { 212 LexicalHandler lexicalHandler = xmlReader.lexicalHandler; 213 if (lexicalHandler != null) { 214 lexicalHandler.endDTD(); 215 } 216 } 217 218 /*package*/ void processingInstruction(String target, String data) 219 throws SAXException { 220 ContentHandler contentHandler = xmlReader.contentHandler; 221 if (contentHandler != null) { 222 contentHandler.processingInstruction(target, data); 223 } 224 } 225 226 /*package*/ void notationDecl(String name, String publicId, String systemId) throws SAXException { 227 DTDHandler dtdHandler = xmlReader.dtdHandler; 228 if (dtdHandler != null) { 229 dtdHandler.notationDecl(name, publicId, systemId); 230 } 231 } 232 233 /*package*/ void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { 234 DTDHandler dtdHandler = xmlReader.dtdHandler; 235 if (dtdHandler != null) { 236 dtdHandler.unparsedEntityDecl(name, publicId, systemId, notationName); 237 } 238 } 239 240 /** 241 * Handles an external entity. 242 * 243 * @param context to be passed back to Expat when we parse the entity 244 * @param publicId the publicId of the entity 245 * @param systemId the systemId of the entity 246 */ 247 /*package*/ void handleExternalEntity(String context, String publicId, 248 String systemId) throws SAXException, IOException { 249 EntityResolver entityResolver = xmlReader.entityResolver; 250 if (entityResolver == null) { 251 return; 252 } 253 254 /* 255 * The spec. is terribly under-specified here. It says that if the 256 * systemId is a URL, we should try to resolve it, but it doesn't 257 * specify how to tell whether or not the systemId is a URL let alone 258 * how to resolve it. 259 * 260 * Other implementations do various insane things. We try to keep it 261 * simple: if the systemId parses as a URI and it's relative, we try to 262 * resolve it against the parent document's systemId. If anything goes 263 * wrong, we go with the original systemId. If crazybob had designed 264 * the API, he would have left all resolving to the EntityResolver. 265 */ 266 if (this.systemId != null) { 267 try { 268 URI systemUri = new URI(systemId); 269 if (!systemUri.isAbsolute() && !systemUri.isOpaque()) { 270 // It could be relative (or it may not be a URI at all!) 271 URI baseUri = new URI(this.systemId); 272 systemUri = baseUri.resolve(systemUri); 273 274 // Replace systemId w/ resolved URI 275 systemId = systemUri.toString(); 276 } 277 } catch (Exception e) { 278 System.logI("Could not resolve '" + systemId + "' relative to" 279 + " '" + this.systemId + "' at " + locator, e); 280 } 281 } 282 283 InputSource inputSource = entityResolver.resolveEntity( 284 publicId, systemId); 285 if (inputSource == null) { 286 /* 287 * The spec. actually says that we should try to treat systemId 288 * as a URL and download and parse its contents here, but an 289 * entity resolver can easily accomplish the same by returning 290 * new InputSource(systemId). 291 * 292 * Downloading external entities by default would result in several 293 * unwanted DTD downloads, not to mention pose a security risk 294 * when parsing untrusted XML -- see for example 295 * http://archive.cert.uni-stuttgart.de/bugtraq/2002/10/msg00421.html -- 296 * so we just do nothing instead. This also enables the user to 297 * opt out of entity parsing when using 298 * {@link org.xml.sax.helpers.DefaultHandler}, something that 299 * wouldn't be possible otherwise. 300 */ 301 return; 302 } 303 304 String encoding = pickEncoding(inputSource); 305 int pointer = createEntityParser(this.pointer, context); 306 try { 307 EntityParser entityParser = new EntityParser(encoding, xmlReader, 308 pointer, inputSource.getPublicId(), 309 inputSource.getSystemId()); 310 311 parseExternalEntity(entityParser, inputSource); 312 } finally { 313 releaseParser(pointer); 314 } 315 } 316 317 /** 318 * Picks an encoding for an external entity. Defaults to UTF-8. 319 */ 320 private String pickEncoding(InputSource inputSource) { 321 Reader reader = inputSource.getCharacterStream(); 322 if (reader != null) { 323 return CHARACTER_ENCODING; 324 } 325 326 String encoding = inputSource.getEncoding(); 327 return encoding == null ? DEFAULT_ENCODING : encoding; 328 } 329 330 /** 331 * Parses the the external entity provided by the input source. 332 */ 333 private void parseExternalEntity(ExpatParser entityParser, 334 InputSource inputSource) throws IOException, SAXException { 335 /* 336 * Expat complains if the external entity isn't wrapped with a root 337 * element so we add one and ignore it later on during parsing. 338 */ 339 340 // Try the character stream. 341 Reader reader = inputSource.getCharacterStream(); 342 if (reader != null) { 343 try { 344 entityParser.append("<externalEntity>"); 345 entityParser.parseFragment(reader); 346 entityParser.append("</externalEntity>"); 347 } finally { 348 IoUtils.closeQuietly(reader); 349 } 350 return; 351 } 352 353 // Try the byte stream. 354 InputStream in = inputSource.getByteStream(); 355 if (in != null) { 356 try { 357 entityParser.append("<externalEntity>" 358 .getBytes(entityParser.encoding)); 359 entityParser.parseFragment(in); 360 entityParser.append("</externalEntity>" 361 .getBytes(entityParser.encoding)); 362 } finally { 363 IoUtils.closeQuietly(in); 364 } 365 return; 366 } 367 368 // Make sure we use the user-provided systemId. 369 String systemId = inputSource.getSystemId(); 370 if (systemId == null) { 371 // TODO: We could just try our systemId here. 372 throw new ParseException("No input specified.", locator); 373 } 374 375 // Try the system id. 376 in = openUrl(systemId); 377 try { 378 entityParser.append("<externalEntity>" 379 .getBytes(entityParser.encoding)); 380 entityParser.parseFragment(in); 381 entityParser.append("</externalEntity>" 382 .getBytes(entityParser.encoding)); 383 } finally { 384 IoUtils.closeQuietly(in); 385 } 386 } 387 388 /** 389 * Creates a native entity parser. 390 * 391 * @param parentPointer pointer to parent Expat parser 392 * @param context passed to {@link #handleExternalEntity} 393 * @return pointer to native parser 394 */ 395 private static native int createEntityParser(int parentPointer, String context); 396 397 /** 398 * Appends part of an XML document. This parser will parse the given XML to 399 * the extent possible and dispatch to the appropriate methods. 400 * 401 * @param xml a whole or partial snippet of XML 402 * @throws SAXException if an error occurs during parsing 403 */ 404 /*package*/ void append(String xml) throws SAXException { 405 try { 406 appendString(this.pointer, xml, false); 407 } catch (ExpatException e) { 408 throw new ParseException(e.getMessage(), this.locator); 409 } 410 } 411 412 private native void appendString(int pointer, String xml, boolean isFinal) 413 throws SAXException, ExpatException; 414 415 /** 416 * Appends part of an XML document. This parser will parse the given XML to 417 * the extent possible and dispatch to the appropriate methods. 418 * 419 * @param xml a whole or partial snippet of XML 420 * @param offset into the char[] 421 * @param length of characters to use 422 * @throws SAXException if an error occurs during parsing 423 */ 424 /*package*/ void append(char[] xml, int offset, int length) 425 throws SAXException { 426 try { 427 appendChars(this.pointer, xml, offset, length); 428 } catch (ExpatException e) { 429 throw new ParseException(e.getMessage(), this.locator); 430 } 431 } 432 433 private native void appendChars(int pointer, char[] xml, int offset, 434 int length) throws SAXException, ExpatException; 435 436 /** 437 * Appends part of an XML document. This parser will parse the given XML to 438 * the extent possible and dispatch to the appropriate methods. 439 * 440 * @param xml a whole or partial snippet of XML 441 * @throws SAXException if an error occurs during parsing 442 */ 443 /*package*/ void append(byte[] xml) throws SAXException { 444 append(xml, 0, xml.length); 445 } 446 447 /** 448 * Appends part of an XML document. This parser will parse the given XML to 449 * the extent possible and dispatch to the appropriate methods. 450 * 451 * @param xml a whole or partial snippet of XML 452 * @param offset into the byte[] 453 * @param length of bytes to use 454 * @throws SAXException if an error occurs during parsing 455 */ 456 /*package*/ void append(byte[] xml, int offset, int length) 457 throws SAXException { 458 try { 459 appendBytes(this.pointer, xml, offset, length); 460 } catch (ExpatException e) { 461 throw new ParseException(e.getMessage(), this.locator); 462 } 463 } 464 465 private native void appendBytes(int pointer, byte[] xml, int offset, 466 int length) throws SAXException, ExpatException; 467 468 /** 469 * Parses an XML document from the given input stream. 470 */ 471 /*package*/ void parseDocument(InputStream in) throws IOException, 472 SAXException { 473 startDocument(); 474 parseFragment(in); 475 finish(); 476 endDocument(); 477 } 478 479 /** 480 * Parses an XML Document from the given reader. 481 */ 482 /*package*/ void parseDocument(Reader in) throws IOException, SAXException { 483 startDocument(); 484 parseFragment(in); 485 finish(); 486 endDocument(); 487 } 488 489 /** 490 * Parses XML from the given Reader. 491 */ 492 private void parseFragment(Reader in) throws IOException, SAXException { 493 char[] buffer = new char[BUFFER_SIZE / 2]; 494 int length; 495 while ((length = in.read(buffer)) != -1) { 496 try { 497 appendChars(this.pointer, buffer, 0, length); 498 } catch (ExpatException e) { 499 throw new ParseException(e.getMessage(), locator); 500 } 501 } 502 } 503 504 /** 505 * Parses XML from the given input stream. 506 */ 507 private void parseFragment(InputStream in) 508 throws IOException, SAXException { 509 byte[] buffer = new byte[BUFFER_SIZE]; 510 int length; 511 while ((length = in.read(buffer)) != -1) { 512 try { 513 appendBytes(this.pointer, buffer, 0, length); 514 } catch (ExpatException e) { 515 throw new ParseException(e.getMessage(), this.locator); 516 } 517 } 518 } 519 520 private void startDocument() throws SAXException { 521 ContentHandler contentHandler = xmlReader.contentHandler; 522 if (contentHandler != null) { 523 contentHandler.setDocumentLocator(this.locator); 524 contentHandler.startDocument(); 525 } 526 } 527 528 private void endDocument() throws SAXException { 529 ContentHandler contentHandler; 530 contentHandler = xmlReader.contentHandler; 531 if (contentHandler != null) { 532 contentHandler.endDocument(); 533 } 534 } 535 536 /** 537 * Indicate that we're finished parsing. 538 * 539 * @throws SAXException if the xml is incomplete 540 */ 541 /*package*/ void finish() throws SAXException { 542 try { 543 appendString(this.pointer, "", true); 544 } catch (ExpatException e) { 545 throw new ParseException(e.getMessage(), this.locator); 546 } 547 } 548 549 @Override protected synchronized void finalize() throws Throwable { 550 try { 551 if (this.pointer != 0) { 552 release(this.pointer); 553 this.pointer = 0; 554 } 555 } finally { 556 super.finalize(); 557 } 558 } 559 560 /** 561 * Releases all native objects. 562 */ 563 private native void release(int pointer); 564 565 /** 566 * Releases native parser only. 567 */ 568 private static native void releaseParser(int pointer); 569 570 /** 571 * Initialize static resources. 572 */ 573 private static native void staticInitialize(String emptyString); 574 575 static { 576 staticInitialize(""); 577 } 578 579 /** 580 * Gets the current line number within the XML file. 581 */ 582 private int line() { 583 return line(this.pointer); 584 } 585 586 private static native int line(int pointer); 587 588 /** 589 * Gets the current column number within the XML file. 590 */ 591 private int column() { 592 return column(this.pointer); 593 } 594 595 private static native int column(int pointer); 596 597 /** 598 * Clones the current attributes so they can be used outside of 599 * startElement(). 600 */ 601 /*package*/ Attributes cloneAttributes() { 602 if (!inStartElement) { 603 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 604 } 605 606 if (attributeCount == 0) { 607 return ClonedAttributes.EMPTY; 608 } 609 610 int clonePointer 611 = cloneAttributes(this.attributePointer, this.attributeCount); 612 return new ClonedAttributes(pointer, clonePointer, attributeCount); 613 } 614 615 private static native int cloneAttributes(int pointer, int attributeCount); 616 617 /** 618 * Used for cloned attributes. 619 */ 620 private static class ClonedAttributes extends ExpatAttributes { 621 622 private static final Attributes EMPTY = new ClonedAttributes(0, 0, 0); 623 624 private final int parserPointer; 625 private int pointer; 626 private final int length; 627 628 /** 629 * Constructs a Java wrapper for native attributes. 630 * 631 * @param parserPointer pointer to the parse, can be 0 if length is 0. 632 * @param pointer pointer to the attributes array, can be 0 if the 633 * length is 0. 634 * @param length number of attributes 635 */ 636 private ClonedAttributes(int parserPointer, int pointer, int length) { 637 this.parserPointer = parserPointer; 638 this.pointer = pointer; 639 this.length = length; 640 } 641 642 @Override 643 public int getParserPointer() { 644 return this.parserPointer; 645 } 646 647 @Override 648 public int getPointer() { 649 return pointer; 650 } 651 652 @Override 653 public int getLength() { 654 return length; 655 } 656 657 @Override protected synchronized void finalize() throws Throwable { 658 try { 659 if (pointer != 0) { 660 freeAttributes(pointer); 661 pointer = 0; 662 } 663 } finally { 664 super.finalize(); 665 } 666 } 667 } 668 669 private class ExpatLocator implements Locator { 670 671 public String getPublicId() { 672 return publicId; 673 } 674 675 public String getSystemId() { 676 return systemId; 677 } 678 679 public int getLineNumber() { 680 return line(); 681 } 682 683 public int getColumnNumber() { 684 return column(); 685 } 686 687 @Override 688 public String toString() { 689 return "Locator[publicId: " + publicId + ", systemId: " + systemId 690 + ", line: " + getLineNumber() 691 + ", column: " + getColumnNumber() + "]"; 692 } 693 } 694 695 /** 696 * Attributes that are only valid during startElement(). 697 */ 698 private class CurrentAttributes extends ExpatAttributes { 699 700 @Override 701 public int getParserPointer() { 702 return pointer; 703 } 704 705 @Override 706 public int getPointer() { 707 if (!inStartElement) { 708 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 709 } 710 return attributePointer; 711 } 712 713 @Override 714 public int getLength() { 715 if (!inStartElement) { 716 throw new IllegalStateException(OUTSIDE_START_ELEMENT); 717 } 718 return attributeCount; 719 } 720 } 721 722 /** 723 * Includes line and column in the message. 724 */ 725 private static class ParseException extends SAXParseException { 726 727 private ParseException(String message, Locator locator) { 728 super(makeMessage(message, locator), locator); 729 } 730 731 private static String makeMessage(String message, Locator locator) { 732 return makeMessage(message, locator.getLineNumber(), 733 locator.getColumnNumber()); 734 } 735 736 private static String makeMessage( 737 String message, int line, int column) { 738 return "At line " + line + ", column " 739 + column + ": " + message; 740 } 741 } 742 743 /** 744 * Opens an InputStream for the given URL. 745 */ 746 /*package*/ static InputStream openUrl(String url) throws IOException { 747 try { 748 URLConnection urlConnection = new URL(url).openConnection(); 749 urlConnection.setConnectTimeout(TIMEOUT); 750 urlConnection.setReadTimeout(TIMEOUT); 751 urlConnection.setDoInput(true); 752 urlConnection.setDoOutput(false); 753 return urlConnection.getInputStream(); 754 } catch (Exception e) { 755 IOException ioe = new IOException("Couldn't open " + url); 756 ioe.initCause(e); 757 throw ioe; 758 } 759 } 760 761 /** 762 * Parses an external entity. 763 */ 764 private static class EntityParser extends ExpatParser { 765 766 private int depth = 0; 767 768 private EntityParser(String encoding, ExpatReader xmlReader, 769 int pointer, String publicId, String systemId) { 770 super(encoding, xmlReader, pointer, publicId, systemId); 771 } 772 773 @Override 774 void startElement(String uri, String localName, String qName, 775 int attributePointer, int attributeCount) throws SAXException { 776 /* 777 * Skip topmost element generated by our workaround in 778 * {@link #handleExternalEntity}. 779 */ 780 if (depth++ > 0) { 781 super.startElement(uri, localName, qName, attributePointer, 782 attributeCount); 783 } 784 } 785 786 @Override 787 void endElement(String uri, String localName, String qName) 788 throws SAXException { 789 if (--depth > 0) { 790 super.endElement(uri, localName, qName); 791 } 792 } 793 794 @Override 795 @SuppressWarnings("FinalizeDoesntCallSuperFinalize") 796 protected synchronized void finalize() throws Throwable { 797 /* 798 * Don't release our native resources. We do so explicitly in 799 * {@link #handleExternalEntity} and we don't want to release the 800 * parsing context--our parent is using it. 801 */ 802 } 803 } 804 } 805