1 /* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 * sell copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 * IN THE SOFTWARE. */ 20 21 // Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode) 22 23 package org.kxml2.io; 24 25 import java.io.Closeable; 26 import java.io.IOException; 27 import java.io.InputStream; 28 import java.io.InputStreamReader; 29 import java.io.Reader; 30 import java.util.HashMap; 31 import java.util.Map; 32 import libcore.internal.StringPool; 33 import org.xmlpull.v1.XmlPullParser; 34 import org.xmlpull.v1.XmlPullParserException; 35 36 /** 37 * An XML pull parser with limited support for parsing internal DTDs. 38 */ 39 public class KXmlParser implements XmlPullParser, Closeable { 40 41 private static final String PROPERTY_XMLDECL_VERSION 42 = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version"; 43 private static final String PROPERTY_XMLDECL_STANDALONE 44 = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone"; 45 private static final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location"; 46 private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed"; 47 48 private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>(); 49 static { 50 DEFAULT_ENTITIES.put("lt", "<"); 51 DEFAULT_ENTITIES.put("gt", ">"); 52 DEFAULT_ENTITIES.put("amp", "&"); 53 DEFAULT_ENTITIES.put("apos", "'"); 54 DEFAULT_ENTITIES.put("quot", "\""); 55 } 56 57 private static final int ELEMENTDECL = 11; 58 private static final int ENTITYDECL = 12; 59 private static final int ATTLISTDECL = 13; 60 private static final int NOTATIONDECL = 14; 61 private static final int PARAMETER_ENTITY_REF = 15; 62 private static final char[] START_COMMENT = { '<', '!', '-', '-' }; 63 private static final char[] END_COMMENT = { '-', '-', '>' }; 64 private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' }; 65 private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' }; 66 private static final char[] END_CDATA = { ']', ']', '>' }; 67 private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' }; 68 private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' }; 69 private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' }; 70 private static final char[] SYSTEM = { 'S', 'Y', 'S', 'T', 'E', 'M' }; 71 private static final char[] PUBLIC = { 'P', 'U', 'B', 'L', 'I', 'C' }; 72 private static final char[] START_ELEMENT = { '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T' }; 73 private static final char[] START_ATTLIST = { '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T' }; 74 private static final char[] START_ENTITY = { '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y' }; 75 private static final char[] START_NOTATION = { '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' }; 76 private static final char[] EMPTY = new char[] { 'E', 'M', 'P', 'T', 'Y' }; 77 private static final char[] ANY = new char[]{ 'A', 'N', 'Y' }; 78 private static final char[] NDATA = new char[]{ 'N', 'D', 'A', 'T', 'A' }; 79 private static final char[] NOTATION = new char[]{ 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' }; 80 private static final char[] REQUIRED = new char[] { 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D' }; 81 private static final char[] IMPLIED = new char[] { 'I', 'M', 'P', 'L', 'I', 'E', 'D' }; 82 private static final char[] FIXED = new char[] { 'F', 'I', 'X', 'E', 'D' }; 83 84 static final private String UNEXPECTED_EOF = "Unexpected EOF"; 85 static final private String ILLEGAL_TYPE = "Wrong event type"; 86 static final private int XML_DECLARATION = 998; 87 88 // general 89 private String location; 90 91 private String version; 92 private Boolean standalone; 93 private String rootElementName; 94 private String systemId; 95 private String publicId; 96 97 /** 98 * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines 99 * entity values and default attribute values. These values are parsed at 100 * inclusion time and may contain both tags and entity references. 101 * 102 * <p>If this is false, the user must {@link #defineEntityReplacementText 103 * define entity values manually}. Such entity values are literal strings 104 * and will not be parsed. There is no API to define default attributes 105 * manually. 106 */ 107 private boolean processDocDecl; 108 private boolean processNsp; 109 private boolean relaxed; 110 private boolean keepNamespaceAttributes; 111 112 /** 113 * If non-null, the contents of the read buffer must be copied into this 114 * string builder before the read buffer is overwritten. This is used to 115 * capture the raw DTD text while parsing the DTD. 116 */ 117 private StringBuilder bufferCapture; 118 119 /** 120 * Entities defined in or for this document. This map is created lazily. 121 */ 122 private Map<String, char[]> documentEntities; 123 124 /** 125 * Default attributes in this document. The outer map's key is the element 126 * name; the inner map's key is the attribute name. Both keys should be 127 * without namespace adjustments. This map is created lazily. 128 */ 129 private Map<String, Map<String, String>> defaultAttributes; 130 131 132 private int depth; 133 private String[] elementStack = new String[16]; 134 private String[] nspStack = new String[8]; 135 private int[] nspCounts = new int[4]; 136 137 // source 138 139 private Reader reader; 140 private String encoding; 141 private ContentSource nextContentSource; 142 private char[] buffer = new char[8192]; 143 private int position = 0; 144 private int limit = 0; 145 146 /* 147 * Track the number of newlines and columns preceding the current buffer. To 148 * compute the line and column of a position in the buffer, compute the line 149 * and column in the buffer and add the preceding values. 150 */ 151 private int bufferStartLine; 152 private int bufferStartColumn; 153 154 // the current token 155 156 private int type; 157 private boolean isWhitespace; 158 private String namespace; 159 private String prefix; 160 private String name; 161 private String text; 162 163 private boolean degenerated; 164 private int attributeCount; 165 166 // true iff. we've encountered the START_TAG of an XML element at depth == 0; 167 private boolean parsedTopLevelStartTag; 168 169 /* 170 * The current element's attributes arranged in groups of 4: 171 * i + 0 = attribute namespace URI 172 * i + 1 = attribute namespace prefix 173 * i + 2 = attribute qualified name (may contain ":", as in "html:h1") 174 * i + 3 = attribute value 175 */ 176 private String[] attributes = new String[16]; 177 178 private String error; 179 180 private boolean unresolved; 181 182 public final StringPool stringPool = new StringPool(); 183 184 /** 185 * Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"} 186 * in pulled elements. Most applications will only be interested in the effective namespaces of 187 * their elements, so these attributes aren't useful. But for structure preserving wrappers like 188 * DOM, it is necessary to keep the namespace data around. 189 */ 190 public void keepNamespaceAttributes() { 191 this.keepNamespaceAttributes = true; 192 } 193 194 private boolean adjustNsp() throws XmlPullParserException { 195 boolean any = false; 196 197 for (int i = 0; i < attributeCount << 2; i += 4) { 198 String attrName = attributes[i + 2]; 199 int cut = attrName.indexOf(':'); 200 String prefix; 201 202 if (cut != -1) { 203 prefix = attrName.substring(0, cut); 204 attrName = attrName.substring(cut + 1); 205 } else if (attrName.equals("xmlns")) { 206 prefix = attrName; 207 attrName = null; 208 } else { 209 continue; 210 } 211 212 if (!prefix.equals("xmlns")) { 213 any = true; 214 } else { 215 int j = (nspCounts[depth]++) << 1; 216 217 nspStack = ensureCapacity(nspStack, j + 2); 218 nspStack[j] = attrName; 219 nspStack[j + 1] = attributes[i + 3]; 220 221 if (attrName != null && attributes[i + 3].isEmpty()) { 222 checkRelaxed("illegal empty namespace"); 223 } 224 225 if (keepNamespaceAttributes) { 226 // explicitly set the namespace for unprefixed attributes 227 // such as xmlns="http://foo" 228 attributes[i] = "http://www.w3.org/2000/xmlns/"; 229 any = true; 230 } else { 231 System.arraycopy( 232 attributes, 233 i + 4, 234 attributes, 235 i, 236 ((--attributeCount) << 2) - i); 237 238 i -= 4; 239 } 240 } 241 } 242 243 if (any) { 244 for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) { 245 246 String attrName = attributes[i + 2]; 247 int cut = attrName.indexOf(':'); 248 249 if (cut == 0 && !relaxed) { 250 throw new RuntimeException( 251 "illegal attribute name: " + attrName + " at " + this); 252 } else if (cut != -1) { 253 String attrPrefix = attrName.substring(0, cut); 254 255 attrName = attrName.substring(cut + 1); 256 257 String attrNs = getNamespace(attrPrefix); 258 259 if (attrNs == null && !relaxed) { 260 throw new RuntimeException( 261 "Undefined Prefix: " + attrPrefix + " in " + this); 262 } 263 264 attributes[i] = attrNs; 265 attributes[i + 1] = attrPrefix; 266 attributes[i + 2] = attrName; 267 } 268 } 269 } 270 271 int cut = name.indexOf(':'); 272 273 if (cut == 0) { 274 checkRelaxed("illegal tag name: " + name); 275 } 276 277 if (cut != -1) { 278 prefix = name.substring(0, cut); 279 name = name.substring(cut + 1); 280 } 281 282 this.namespace = getNamespace(prefix); 283 284 if (this.namespace == null) { 285 if (prefix != null) { 286 checkRelaxed("undefined prefix: " + prefix); 287 } 288 this.namespace = NO_NAMESPACE; 289 } 290 291 return any; 292 } 293 294 private String[] ensureCapacity(String[] arr, int required) { 295 if (arr.length >= required) { 296 return arr; 297 } 298 String[] bigger = new String[required + 16]; 299 System.arraycopy(arr, 0, bigger, 0, arr.length); 300 return bigger; 301 } 302 303 private void checkRelaxed(String errorMessage) throws XmlPullParserException { 304 if (!relaxed) { 305 throw new XmlPullParserException(errorMessage, this, null); 306 } 307 if (error == null) { 308 error = "Error: " + errorMessage; 309 } 310 } 311 312 public int next() throws XmlPullParserException, IOException { 313 return next(false); 314 } 315 316 public int nextToken() throws XmlPullParserException, IOException { 317 return next(true); 318 } 319 320 private int next(boolean justOneToken) throws IOException, XmlPullParserException { 321 if (reader == null) { 322 throw new XmlPullParserException("setInput() must be called first.", this, null); 323 } 324 325 if (type == END_TAG) { 326 depth--; 327 } 328 329 // degenerated needs to be handled before error because of possible 330 // processor expectations(!) 331 332 if (degenerated) { 333 degenerated = false; 334 type = END_TAG; 335 return type; 336 } 337 338 if (error != null) { 339 if (justOneToken) { 340 text = error; 341 type = COMMENT; 342 error = null; 343 return type; 344 } else { 345 error = null; 346 } 347 } 348 349 type = peekType(false); 350 351 if (type == XML_DECLARATION) { 352 readXmlDeclaration(); 353 type = peekType(false); 354 } 355 356 text = null; 357 isWhitespace = true; 358 prefix = null; 359 name = null; 360 namespace = null; 361 attributeCount = -1; 362 boolean throwOnResolveFailure = !justOneToken; 363 364 while (true) { 365 switch (type) { 366 367 /* 368 * Return immediately after encountering a start tag, end tag, or 369 * the end of the document. 370 */ 371 case START_TAG: 372 parseStartTag(false, throwOnResolveFailure); 373 return type; 374 case END_TAG: 375 readEndTag(); 376 return type; 377 case END_DOCUMENT: 378 return type; 379 380 /* 381 * Return after any text token when we're looking for a single 382 * token. Otherwise concatenate all text between tags. 383 */ 384 case ENTITY_REF: 385 if (justOneToken) { 386 StringBuilder entityTextBuilder = new StringBuilder(); 387 readEntity(entityTextBuilder, true, throwOnResolveFailure, ValueContext.TEXT); 388 text = entityTextBuilder.toString(); 389 break; 390 } 391 // fall-through 392 case TEXT: 393 text = readValue('<', !justOneToken, throwOnResolveFailure, ValueContext.TEXT); 394 if (depth == 0 && isWhitespace) { 395 type = IGNORABLE_WHITESPACE; 396 } 397 break; 398 case CDSECT: 399 read(START_CDATA); 400 text = readUntil(END_CDATA, true); 401 break; 402 403 /* 404 * Comments, processing instructions and declarations are returned 405 * when we're looking for a single token. Otherwise they're skipped. 406 */ 407 case COMMENT: 408 String commentText = readComment(justOneToken); 409 if (justOneToken) { 410 text = commentText; 411 } 412 break; 413 case PROCESSING_INSTRUCTION: 414 read(START_PROCESSING_INSTRUCTION); 415 String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken); 416 if (justOneToken) { 417 text = processingInstruction; 418 } 419 break; 420 case DOCDECL: 421 readDoctype(justOneToken); 422 if (parsedTopLevelStartTag) { 423 throw new XmlPullParserException("Unexpected token", this, null); 424 } 425 break; 426 427 default: 428 throw new XmlPullParserException("Unexpected token", this, null); 429 } 430 431 if (depth == 0 && (type == ENTITY_REF || type == TEXT || type == CDSECT)) { 432 throw new XmlPullParserException("Unexpected token", this, null); 433 } 434 435 if (justOneToken) { 436 return type; 437 } 438 439 if (type == IGNORABLE_WHITESPACE) { 440 text = null; 441 } 442 443 /* 444 * We've read all that we can of a non-empty text block. Always 445 * report this as text, even if it was a CDATA block or entity 446 * reference. 447 */ 448 int peek = peekType(false); 449 if (text != null && !text.isEmpty() && peek < TEXT) { 450 type = TEXT; 451 return type; 452 } 453 454 type = peek; 455 } 456 } 457 458 /** 459 * Reads text until the specified delimiter is encountered. Consumes the 460 * text and the delimiter. 461 * 462 * @param returnText true to return the read text excluding the delimiter; 463 * false to return null. 464 */ 465 private String readUntil(char[] delimiter, boolean returnText) 466 throws IOException, XmlPullParserException { 467 int start = position; 468 StringBuilder result = null; 469 470 if (returnText && text != null) { 471 result = new StringBuilder(); 472 result.append(text); 473 } 474 475 search: 476 while (true) { 477 if (position + delimiter.length > limit) { 478 if (start < position && returnText) { 479 if (result == null) { 480 result = new StringBuilder(); 481 } 482 result.append(buffer, start, position - start); 483 } 484 if (!fillBuffer(delimiter.length)) { 485 checkRelaxed(UNEXPECTED_EOF); 486 type = COMMENT; 487 return null; 488 } 489 start = position; 490 } 491 492 // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length) 493 // when the VM has better method inlining 494 for (int i = 0; i < delimiter.length; i++) { 495 if (buffer[position + i] != delimiter[i]) { 496 position++; 497 continue search; 498 } 499 } 500 501 break; 502 } 503 504 int end = position; 505 position += delimiter.length; 506 507 if (!returnText) { 508 return null; 509 } else if (result == null) { 510 return stringPool.get(buffer, start, end - start); 511 } else { 512 result.append(buffer, start, end - start); 513 return result.toString(); 514 } 515 } 516 517 /** 518 * Returns true if an XML declaration was read. 519 */ 520 private void readXmlDeclaration() throws IOException, XmlPullParserException { 521 if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) { 522 checkRelaxed("processing instructions must not start with xml"); 523 } 524 525 read(START_PROCESSING_INSTRUCTION); 526 parseStartTag(true, true); 527 528 if (attributeCount < 1 || !"version".equals(attributes[2])) { 529 checkRelaxed("version expected"); 530 } 531 532 version = attributes[3]; 533 534 int pos = 1; 535 536 if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) { 537 encoding = attributes[3 + 4]; 538 pos++; 539 } 540 541 if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) { 542 String st = attributes[3 + 4 * pos]; 543 if ("yes".equals(st)) { 544 standalone = Boolean.TRUE; 545 } else if ("no".equals(st)) { 546 standalone = Boolean.FALSE; 547 } else { 548 checkRelaxed("illegal standalone value: " + st); 549 } 550 pos++; 551 } 552 553 if (pos != attributeCount) { 554 checkRelaxed("unexpected attributes in XML declaration"); 555 } 556 557 isWhitespace = true; 558 text = null; 559 } 560 561 private String readComment(boolean returnText) throws IOException, XmlPullParserException { 562 read(START_COMMENT); 563 564 if (relaxed) { 565 return readUntil(END_COMMENT, returnText); 566 } 567 568 String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText); 569 if (peekCharacter() != '>') { 570 throw new XmlPullParserException("Comments may not contain --", this, null); 571 } 572 position++; 573 return commentText; 574 } 575 576 /** 577 * Read the document's DTD. Although this parser is non-validating, the DTD 578 * must be parsed to capture entity values and default attribute values. 579 */ 580 private void readDoctype(boolean saveDtdText) throws IOException, XmlPullParserException { 581 read(START_DOCTYPE); 582 583 int startPosition = -1; 584 if (saveDtdText) { 585 bufferCapture = new StringBuilder(); 586 startPosition = position; 587 } 588 try { 589 skip(); 590 rootElementName = readName(); 591 readExternalId(true, true); 592 skip(); 593 if (peekCharacter() == '[') { 594 readInternalSubset(); 595 } 596 skip(); 597 } finally { 598 if (saveDtdText) { 599 bufferCapture.append(buffer, 0, position); 600 bufferCapture.delete(0, startPosition); 601 text = bufferCapture.toString(); 602 bufferCapture = null; 603 } 604 } 605 606 read('>'); 607 skip(); 608 } 609 610 /** 611 * Reads an external ID of one of these two forms: 612 * SYSTEM "quoted system name" 613 * PUBLIC "quoted public id" "quoted system name" 614 * 615 * If the system name is not required, this also supports lone public IDs of 616 * this form: 617 * PUBLIC "quoted public id" 618 * 619 * Returns true if any ID was read. 620 */ 621 private boolean readExternalId(boolean requireSystemName, boolean assignFields) 622 throws IOException, XmlPullParserException { 623 skip(); 624 int c = peekCharacter(); 625 626 if (c == 'S') { 627 read(SYSTEM); 628 } else if (c == 'P') { 629 read(PUBLIC); 630 skip(); 631 if (assignFields) { 632 publicId = readQuotedId(true); 633 } else { 634 readQuotedId(false); 635 } 636 } else { 637 return false; 638 } 639 640 skip(); 641 642 if (!requireSystemName) { 643 int delimiter = peekCharacter(); 644 if (delimiter != '"' && delimiter != '\'') { 645 return true; // no system name! 646 } 647 } 648 649 if (assignFields) { 650 systemId = readQuotedId(true); 651 } else { 652 readQuotedId(false); 653 } 654 return true; 655 } 656 657 private static final char[] SINGLE_QUOTE = new char[] { '\'' }; 658 private static final char[] DOUBLE_QUOTE = new char[] { '"' }; 659 660 /** 661 * Reads a quoted string, performing no entity escaping of the contents. 662 */ 663 private String readQuotedId(boolean returnText) throws IOException, XmlPullParserException { 664 int quote = peekCharacter(); 665 char[] delimiter; 666 if (quote == '"') { 667 delimiter = DOUBLE_QUOTE; 668 } else if (quote == '\'') { 669 delimiter = SINGLE_QUOTE; 670 } else { 671 throw new XmlPullParserException("Expected a quoted string", this, null); 672 } 673 position++; 674 return readUntil(delimiter, returnText); 675 } 676 677 private void readInternalSubset() throws IOException, XmlPullParserException { 678 read('['); 679 680 while (true) { 681 skip(); 682 if (peekCharacter() == ']') { 683 position++; 684 return; 685 } 686 687 int declarationType = peekType(true); 688 switch (declarationType) { 689 case ELEMENTDECL: 690 readElementDeclaration(); 691 break; 692 693 case ATTLISTDECL: 694 readAttributeListDeclaration(); 695 break; 696 697 case ENTITYDECL: 698 readEntityDeclaration(); 699 break; 700 701 case NOTATIONDECL: 702 readNotationDeclaration(); 703 break; 704 705 case PROCESSING_INSTRUCTION: 706 read(START_PROCESSING_INSTRUCTION); 707 readUntil(END_PROCESSING_INSTRUCTION, false); 708 break; 709 710 case COMMENT: 711 readComment(false); 712 break; 713 714 case PARAMETER_ENTITY_REF: 715 throw new XmlPullParserException( 716 "Parameter entity references are not supported", this, null); 717 718 default: 719 throw new XmlPullParserException("Unexpected token", this, null); 720 } 721 } 722 } 723 724 /** 725 * Read an element declaration. This contains a name and a content spec. 726 * <!ELEMENT foo EMPTY > 727 * <!ELEMENT foo (bar?,(baz|quux)) > 728 * <!ELEMENT foo (#PCDATA|bar)* > 729 */ 730 private void readElementDeclaration() throws IOException, XmlPullParserException { 731 read(START_ELEMENT); 732 skip(); 733 readName(); 734 readContentSpec(); 735 skip(); 736 read('>'); 737 } 738 739 /** 740 * Read an element content spec. This is a regular expression-like pattern 741 * of names or other content specs. The following operators are supported: 742 * sequence: (a,b,c) 743 * choice: (a|b|c) 744 * optional: a? 745 * one or more: a+ 746 * any number: a* 747 * 748 * The special name '#PCDATA' is permitted but only if it is the first 749 * element of the first group: 750 * (#PCDATA|a|b) 751 * 752 * The top-level element must be either a choice, a sequence, or one of the 753 * special names EMPTY and ANY. 754 */ 755 private void readContentSpec() throws IOException, XmlPullParserException { 756 // this implementation is very lenient; it scans for balanced parens only 757 skip(); 758 int c = peekCharacter(); 759 if (c == '(') { 760 int depth = 0; 761 do { 762 if (c == '(') { 763 depth++; 764 } else if (c == ')') { 765 depth--; 766 } else if (c == -1) { 767 throw new XmlPullParserException( 768 "Unterminated element content spec", this, null); 769 } 770 position++; 771 c = peekCharacter(); 772 } while (depth > 0); 773 774 if (c == '*' || c == '?' || c == '+') { 775 position++; 776 } 777 } else if (c == EMPTY[0]) { 778 read(EMPTY); 779 } else if (c == ANY[0]) { 780 read(ANY); 781 } else { 782 throw new XmlPullParserException("Expected element content spec", this, null); 783 } 784 } 785 786 /** 787 * Reads an attribute list declaration such as the following: 788 * <!ATTLIST foo 789 * bar CDATA #IMPLIED 790 * quux (a|b|c) "c" 791 * baz NOTATION (a|b|c) #FIXED "c"> 792 * 793 * Each attribute has a name, type and default. 794 * 795 * Types are one of the built-in types (CDATA, ID, IDREF, IDREFS, ENTITY, 796 * ENTITIES, NMTOKEN, or NMTOKENS), an enumerated type "(list|of|options)" 797 * or NOTATION followed by an enumerated type. 798 * 799 * The default is either #REQUIRED, #IMPLIED, #FIXED, a quoted value, or 800 * #FIXED with a quoted value. 801 */ 802 private void readAttributeListDeclaration() throws IOException, XmlPullParserException { 803 read(START_ATTLIST); 804 skip(); 805 String elementName = readName(); 806 807 while (true) { 808 skip(); 809 int c = peekCharacter(); 810 if (c == '>') { 811 position++; 812 return; 813 } 814 815 // attribute name 816 String attributeName = readName(); 817 818 // attribute type 819 skip(); 820 if (position + 1 >= limit && !fillBuffer(2)) { 821 throw new XmlPullParserException("Malformed attribute list", this, null); 822 } 823 if (buffer[position] == NOTATION[0] && buffer[position + 1] == NOTATION[1]) { 824 read(NOTATION); 825 skip(); 826 } 827 c = peekCharacter(); 828 if (c == '(') { 829 position++; 830 while (true) { 831 skip(); 832 readName(); 833 skip(); 834 c = peekCharacter(); 835 if (c == ')') { 836 position++; 837 break; 838 } else if (c == '|') { 839 position++; 840 } else { 841 throw new XmlPullParserException("Malformed attribute type", this, null); 842 } 843 } 844 } else { 845 readName(); 846 } 847 848 // default value 849 skip(); 850 c = peekCharacter(); 851 if (c == '#') { 852 position++; 853 c = peekCharacter(); 854 if (c == 'R') { 855 read(REQUIRED); 856 } else if (c == 'I') { 857 read(IMPLIED); 858 } else if (c == 'F') { 859 read(FIXED); 860 } else { 861 throw new XmlPullParserException("Malformed attribute type", this, null); 862 } 863 skip(); 864 c = peekCharacter(); 865 } 866 if (c == '"' || c == '\'') { 867 position++; 868 // TODO: does this do escaping correctly? 869 String value = readValue((char) c, true, true, ValueContext.ATTRIBUTE); 870 if (peekCharacter() == c) { 871 position++; 872 } 873 defineAttributeDefault(elementName, attributeName, value); 874 } 875 } 876 } 877 878 private void defineAttributeDefault(String elementName, String attributeName, String value) { 879 if (defaultAttributes == null) { 880 defaultAttributes = new HashMap<String, Map<String, String>>(); 881 } 882 Map<String, String> elementAttributes = defaultAttributes.get(elementName); 883 if (elementAttributes == null) { 884 elementAttributes = new HashMap<String, String>(); 885 defaultAttributes.put(elementName, elementAttributes); 886 } 887 elementAttributes.put(attributeName, value); 888 } 889 890 /** 891 * Read an entity declaration. The value of internal entities are inline: 892 * <!ENTITY foo "bar"> 893 * 894 * The values of external entities must be retrieved by URL or path: 895 * <!ENTITY foo SYSTEM "http://host/file"> 896 * <!ENTITY foo PUBLIC "-//Android//Foo//EN" "http://host/file"> 897 * <!ENTITY foo SYSTEM "../file.png" NDATA png> 898 * 899 * Entities may be general or parameterized. Parameterized entities are 900 * marked by a percent sign. Such entities may only be used in the DTD: 901 * <!ENTITY % foo "bar"> 902 */ 903 private void readEntityDeclaration() throws IOException, XmlPullParserException { 904 read(START_ENTITY); 905 boolean generalEntity = true; 906 907 skip(); 908 if (peekCharacter() == '%') { 909 generalEntity = false; 910 position++; 911 skip(); 912 } 913 914 String name = readName(); 915 916 skip(); 917 int quote = peekCharacter(); 918 String entityValue; 919 if (quote == '"' || quote == '\'') { 920 position++; 921 entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION); 922 if (peekCharacter() == quote) { 923 position++; 924 } 925 } else if (readExternalId(true, false)) { 926 /* 927 * Map external entities to the empty string. This is dishonest, 928 * but it's consistent with Android's Expat pull parser. 929 */ 930 entityValue = ""; 931 skip(); 932 if (peekCharacter() == NDATA[0]) { 933 read(NDATA); 934 skip(); 935 readName(); 936 } 937 } else { 938 throw new XmlPullParserException("Expected entity value or external ID", this, null); 939 } 940 941 if (generalEntity && processDocDecl) { 942 if (documentEntities == null) { 943 documentEntities = new HashMap<String, char[]>(); 944 } 945 documentEntities.put(name, entityValue.toCharArray()); 946 } 947 948 skip(); 949 read('>'); 950 } 951 952 private void readNotationDeclaration() throws IOException, XmlPullParserException { 953 read(START_NOTATION); 954 skip(); 955 readName(); 956 if (!readExternalId(false, false)) { 957 throw new XmlPullParserException( 958 "Expected external ID or public ID for notation", this, null); 959 } 960 skip(); 961 read('>'); 962 } 963 964 private void readEndTag() throws IOException, XmlPullParserException { 965 read('<'); 966 read('/'); 967 name = readName(); // TODO: pass the expected name in as a hint? 968 skip(); 969 read('>'); 970 971 int sp = (depth - 1) * 4; 972 973 if (depth == 0) { 974 checkRelaxed("read end tag " + name + " with no tags open"); 975 type = COMMENT; 976 return; 977 } 978 979 if (name.equals(elementStack[sp + 3])) { 980 namespace = elementStack[sp]; 981 prefix = elementStack[sp + 1]; 982 name = elementStack[sp + 2]; 983 } else if (!relaxed) { 984 throw new XmlPullParserException( 985 "expected: /" + elementStack[sp + 3] + " read: " + name, this, null); 986 } 987 } 988 989 /** 990 * Returns the type of the next token. 991 */ 992 private int peekType(boolean inDeclaration) throws IOException, XmlPullParserException { 993 if (position >= limit && !fillBuffer(1)) { 994 return END_DOCUMENT; 995 } 996 997 switch (buffer[position]) { 998 case '&': 999 return ENTITY_REF; // & 1000 case '<': 1001 if (position + 3 >= limit && !fillBuffer(4)) { 1002 throw new XmlPullParserException("Dangling <", this, null); 1003 } 1004 1005 switch (buffer[position + 1]) { 1006 case '/': 1007 return END_TAG; // </ 1008 case '?': 1009 // we're looking for "<?xml " with case insensitivity 1010 if ((position + 5 < limit || fillBuffer(6)) 1011 && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X') 1012 && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M') 1013 && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L') 1014 && (buffer[position + 5] == ' ')) { 1015 return XML_DECLARATION; // <?xml 1016 } else { 1017 return PROCESSING_INSTRUCTION; // <? 1018 } 1019 case '!': 1020 switch (buffer[position + 2]) { 1021 case 'D': 1022 return DOCDECL; // <!D 1023 case '[': 1024 return CDSECT; // <![ 1025 case '-': 1026 return COMMENT; // <!- 1027 case 'E': 1028 switch (buffer[position + 3]) { 1029 case 'L': 1030 return ELEMENTDECL; // <!EL 1031 case 'N': 1032 return ENTITYDECL; // <!EN 1033 } 1034 break; 1035 case 'A': 1036 return ATTLISTDECL; // <!A 1037 case 'N': 1038 return NOTATIONDECL; // <!N 1039 } 1040 throw new XmlPullParserException("Unexpected <!", this, null); 1041 default: 1042 return START_TAG; // < 1043 } 1044 case '%': 1045 return inDeclaration ? PARAMETER_ENTITY_REF : TEXT; 1046 default: 1047 return TEXT; 1048 } 1049 } 1050 1051 /** 1052 * Sets name and attributes 1053 */ 1054 private void parseStartTag(boolean xmldecl, boolean throwOnResolveFailure) 1055 throws IOException, XmlPullParserException { 1056 if (!xmldecl) { 1057 read('<'); 1058 } 1059 name = readName(); 1060 attributeCount = 0; 1061 1062 while (true) { 1063 skip(); 1064 1065 if (position >= limit && !fillBuffer(1)) { 1066 checkRelaxed(UNEXPECTED_EOF); 1067 return; 1068 } 1069 1070 int c = buffer[position]; 1071 1072 if (xmldecl) { 1073 if (c == '?') { 1074 position++; 1075 read('>'); 1076 return; 1077 } 1078 } else { 1079 if (c == '/') { 1080 degenerated = true; 1081 position++; 1082 skip(); 1083 read('>'); 1084 break; 1085 } else if (c == '>') { 1086 position++; 1087 break; 1088 } 1089 } 1090 1091 String attrName = readName(); 1092 1093 int i = (attributeCount++) * 4; 1094 attributes = ensureCapacity(attributes, i + 4); 1095 attributes[i] = ""; 1096 attributes[i + 1] = null; 1097 attributes[i + 2] = attrName; 1098 1099 skip(); 1100 if (position >= limit && !fillBuffer(1)) { 1101 checkRelaxed(UNEXPECTED_EOF); 1102 return; 1103 } 1104 1105 if (buffer[position] == '=') { 1106 position++; 1107 1108 skip(); 1109 if (position >= limit && !fillBuffer(1)) { 1110 checkRelaxed(UNEXPECTED_EOF); 1111 return; 1112 } 1113 char delimiter = buffer[position]; 1114 1115 if (delimiter == '\'' || delimiter == '"') { 1116 position++; 1117 } else if (relaxed) { 1118 delimiter = ' '; 1119 } else { 1120 throw new XmlPullParserException("attr value delimiter missing!", this, null); 1121 } 1122 1123 attributes[i + 3] = readValue(delimiter, true, throwOnResolveFailure, 1124 ValueContext.ATTRIBUTE); 1125 1126 if (delimiter != ' ' && peekCharacter() == delimiter) { 1127 position++; // end quote 1128 } 1129 } else if (relaxed) { 1130 attributes[i + 3] = attrName; 1131 } else { 1132 checkRelaxed("Attr.value missing f. " + attrName); 1133 attributes[i + 3] = attrName; 1134 } 1135 } 1136 1137 int sp = depth++ * 4; 1138 if (depth == 1) { 1139 parsedTopLevelStartTag = true; 1140 } 1141 elementStack = ensureCapacity(elementStack, sp + 4); 1142 elementStack[sp + 3] = name; 1143 1144 if (depth >= nspCounts.length) { 1145 int[] bigger = new int[depth + 4]; 1146 System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length); 1147 nspCounts = bigger; 1148 } 1149 1150 nspCounts[depth] = nspCounts[depth - 1]; 1151 1152 if (processNsp) { 1153 adjustNsp(); 1154 } else { 1155 namespace = ""; 1156 } 1157 1158 // For consistency with Expat, add default attributes after fixing namespaces. 1159 if (defaultAttributes != null) { 1160 Map<String, String> elementDefaultAttributes = defaultAttributes.get(name); 1161 if (elementDefaultAttributes != null) { 1162 for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) { 1163 if (getAttributeValue(null, entry.getKey()) != null) { 1164 continue; // an explicit value overrides the default 1165 } 1166 1167 int i = (attributeCount++) * 4; 1168 attributes = ensureCapacity(attributes, i + 4); 1169 attributes[i] = ""; 1170 attributes[i + 1] = null; 1171 attributes[i + 2] = entry.getKey(); 1172 attributes[i + 3] = entry.getValue(); 1173 } 1174 } 1175 } 1176 1177 elementStack[sp] = namespace; 1178 elementStack[sp + 1] = prefix; 1179 elementStack[sp + 2] = name; 1180 } 1181 1182 /** 1183 * Reads an entity reference from the buffer, resolves it, and writes the 1184 * resolved entity to {@code out}. If the entity cannot be read or resolved, 1185 * {@code out} will contain the partial entity reference. 1186 */ 1187 private void readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure, 1188 ValueContext valueContext) throws IOException, XmlPullParserException { 1189 int start = out.length(); 1190 1191 if (buffer[position++] != '&') { 1192 throw new AssertionError(); 1193 } 1194 1195 out.append('&'); 1196 1197 while (true) { 1198 int c = peekCharacter(); 1199 1200 if (c == ';') { 1201 out.append(';'); 1202 position++; 1203 break; 1204 1205 } else if (c >= 128 1206 || (c >= '0' && c <= '9') 1207 || (c >= 'a' && c <= 'z') 1208 || (c >= 'A' && c <= 'Z') 1209 || c == '_' 1210 || c == '-' 1211 || c == '#') { 1212 position++; 1213 out.append((char) c); 1214 1215 } else if (relaxed) { 1216 // intentionally leave the partial reference in 'out' 1217 return; 1218 1219 } else { 1220 throw new XmlPullParserException("unterminated entity ref", this, null); 1221 } 1222 } 1223 1224 String code = out.substring(start + 1, out.length() - 1); 1225 1226 if (isEntityToken) { 1227 name = code; 1228 } 1229 1230 if (code.startsWith("#")) { 1231 try { 1232 int c = code.startsWith("#x") 1233 ? Integer.parseInt(code.substring(2), 16) 1234 : Integer.parseInt(code.substring(1)); 1235 out.delete(start, out.length()); 1236 out.appendCodePoint(c); 1237 unresolved = false; 1238 return; 1239 } catch (NumberFormatException notANumber) { 1240 throw new XmlPullParserException("Invalid character reference: &" + code); 1241 } catch (IllegalArgumentException invalidCodePoint) { 1242 throw new XmlPullParserException("Invalid character reference: &" + code); 1243 } 1244 } 1245 1246 if (valueContext == ValueContext.ENTITY_DECLARATION) { 1247 // keep the unresolved &code; in the text to resolve later 1248 return; 1249 } 1250 1251 String defaultEntity = DEFAULT_ENTITIES.get(code); 1252 if (defaultEntity != null) { 1253 out.delete(start, out.length()); 1254 unresolved = false; 1255 out.append(defaultEntity); 1256 return; 1257 } 1258 1259 char[] resolved; 1260 if (documentEntities != null && (resolved = documentEntities.get(code)) != null) { 1261 out.delete(start, out.length()); 1262 unresolved = false; 1263 if (processDocDecl) { 1264 pushContentSource(resolved); // parse the entity as XML 1265 } else { 1266 out.append(resolved); // include the entity value as text 1267 } 1268 return; 1269 } 1270 1271 /* 1272 * The parser skipped an external DTD, and now we've encountered an 1273 * unknown entity that could have been declared there. Map it to the 1274 * empty string. This is dishonest, but it's consistent with Android's 1275 * old ExpatPullParser. 1276 */ 1277 if (systemId != null) { 1278 out.delete(start, out.length()); 1279 return; 1280 } 1281 1282 // keep the unresolved entity "&code;" in the text for relaxed clients 1283 unresolved = true; 1284 if (throwOnResolveFailure) { 1285 checkRelaxed("unresolved: &" + code + ";"); 1286 } 1287 } 1288 1289 /** 1290 * Where a value is found impacts how that value is interpreted. For 1291 * example, in attributes, "\n" must be replaced with a space character. In 1292 * text, "]]>" is forbidden. In entity declarations, named references are 1293 * not resolved. 1294 */ 1295 enum ValueContext { 1296 ATTRIBUTE, 1297 TEXT, 1298 ENTITY_DECLARATION 1299 } 1300 1301 /** 1302 * Returns the current text or attribute value. This also has the side 1303 * effect of setting isWhitespace to false if a non-whitespace character is 1304 * encountered. 1305 * 1306 * @param delimiter {@code <} for text, {@code "} and {@code '} for quoted 1307 * attributes, or a space for unquoted attributes. 1308 */ 1309 private String readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure, 1310 ValueContext valueContext) throws IOException, XmlPullParserException { 1311 1312 /* 1313 * This method returns all of the characters from the current position 1314 * through to an appropriate delimiter. 1315 * 1316 * If we're lucky (which we usually are), we'll return a single slice of 1317 * the buffer. This fast path avoids allocating a string builder. 1318 * 1319 * There are 6 unlucky characters we could encounter: 1320 * - "&": entities must be resolved. 1321 * - "%": parameter entities are unsupported in entity values. 1322 * - "<": this isn't permitted in attributes unless relaxed. 1323 * - "]": this requires a lookahead to defend against the forbidden 1324 * CDATA section delimiter "]]>". 1325 * - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it 1326 * isn't followed by "\n", we replace "\r" with either a "\n" 1327 * in text nodes or a space in attribute values. 1328 * - "\n": In attribute values, "\n" must be replaced with a space. 1329 * 1330 * We could also get unlucky by needing to refill the buffer midway 1331 * through the text. 1332 */ 1333 1334 int start = position; 1335 StringBuilder result = null; 1336 1337 // if a text section was already started, prefix the start 1338 if (valueContext == ValueContext.TEXT && text != null) { 1339 result = new StringBuilder(); 1340 result.append(text); 1341 } 1342 1343 while (true) { 1344 1345 /* 1346 * Make sure we have at least a single character to read from the 1347 * buffer. This mutates the buffer, so save the partial result 1348 * to the slow path string builder first. 1349 */ 1350 if (position >= limit) { 1351 if (start < position) { 1352 if (result == null) { 1353 result = new StringBuilder(); 1354 } 1355 result.append(buffer, start, position - start); 1356 } 1357 if (!fillBuffer(1)) { 1358 return result != null ? result.toString() : ""; 1359 } 1360 start = position; 1361 } 1362 1363 char c = buffer[position]; 1364 1365 if (c == delimiter 1366 || (delimiter == ' ' && (c <= ' ' || c == '>')) 1367 || c == '&' && !resolveEntities) { 1368 break; 1369 } 1370 1371 if (c != '\r' 1372 && (c != '\n' || valueContext != ValueContext.ATTRIBUTE) 1373 && c != '&' 1374 && c != '<' 1375 && (c != ']' || valueContext != ValueContext.TEXT) 1376 && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) { 1377 isWhitespace &= (c <= ' '); 1378 position++; 1379 continue; 1380 } 1381 1382 /* 1383 * We've encountered an unlucky character! Convert from fast 1384 * path to slow path if we haven't done so already. 1385 */ 1386 if (result == null) { 1387 result = new StringBuilder(); 1388 } 1389 result.append(buffer, start, position - start); 1390 1391 if (c == '\r') { 1392 if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') { 1393 position++; 1394 } 1395 c = (valueContext == ValueContext.ATTRIBUTE) ? ' ' : '\n'; 1396 1397 } else if (c == '\n') { 1398 c = ' '; 1399 1400 } else if (c == '&') { 1401 isWhitespace = false; // TODO: what if the entity resolves to whitespace? 1402 readEntity(result, false, throwOnResolveFailure, valueContext); 1403 start = position; 1404 continue; 1405 1406 } else if (c == '<') { 1407 if (valueContext == ValueContext.ATTRIBUTE) { 1408 checkRelaxed("Illegal: \"<\" inside attribute value"); 1409 } 1410 isWhitespace = false; 1411 1412 } else if (c == ']') { 1413 if ((position + 2 < limit || fillBuffer(3)) 1414 && buffer[position + 1] == ']' && buffer[position + 2] == '>') { 1415 checkRelaxed("Illegal: \"]]>\" outside CDATA section"); 1416 } 1417 isWhitespace = false; 1418 1419 } else if (c == '%') { 1420 throw new XmlPullParserException("This parser doesn't support parameter entities", 1421 this, null); 1422 1423 } else { 1424 throw new AssertionError(); 1425 } 1426 1427 position++; 1428 result.append(c); 1429 start = position; 1430 } 1431 1432 if (result == null) { 1433 return stringPool.get(buffer, start, position - start); 1434 } else { 1435 result.append(buffer, start, position - start); 1436 return result.toString(); 1437 } 1438 } 1439 1440 private void read(char expected) throws IOException, XmlPullParserException { 1441 int c = peekCharacter(); 1442 if (c != expected) { 1443 checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'"); 1444 if (c == -1) { 1445 return; // On EOF, don't move position beyond limit 1446 } 1447 } 1448 position++; 1449 } 1450 1451 private void read(char[] chars) throws IOException, XmlPullParserException { 1452 if (position + chars.length > limit && !fillBuffer(chars.length)) { 1453 checkRelaxed("expected: '" + new String(chars) + "' but was EOF"); 1454 return; 1455 } 1456 1457 // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length) 1458 // when the VM has better method inlining 1459 for (int i = 0; i < chars.length; i++) { 1460 if (buffer[position + i] != chars[i]) { 1461 checkRelaxed("expected: \"" + new String(chars) + "\" but was \"" 1462 + new String(buffer, position, chars.length) + "...\""); 1463 } 1464 } 1465 1466 position += chars.length; 1467 } 1468 1469 private int peekCharacter() throws IOException, XmlPullParserException { 1470 if (position < limit || fillBuffer(1)) { 1471 return buffer[position]; 1472 } 1473 return -1; 1474 } 1475 1476 /** 1477 * Returns true once {@code limit - position >= minimum}. If the data is 1478 * exhausted before that many characters are available, this returns 1479 * false. 1480 */ 1481 private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException { 1482 // If we've exhausted the current content source, remove it 1483 while (nextContentSource != null) { 1484 if (position < limit) { 1485 throw new XmlPullParserException("Unbalanced entity!", this, null); 1486 } 1487 popContentSource(); 1488 if (limit - position >= minimum) { 1489 return true; 1490 } 1491 } 1492 1493 // Before clobbering the old characters, update where buffer starts 1494 for (int i = 0; i < position; i++) { 1495 if (buffer[i] == '\n') { 1496 bufferStartLine++; 1497 bufferStartColumn = 0; 1498 } else { 1499 bufferStartColumn++; 1500 } 1501 } 1502 1503 if (bufferCapture != null) { 1504 bufferCapture.append(buffer, 0, position); 1505 } 1506 1507 if (limit != position) { 1508 limit -= position; 1509 System.arraycopy(buffer, position, buffer, 0, limit); 1510 } else { 1511 limit = 0; 1512 } 1513 1514 position = 0; 1515 int total; 1516 while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) { 1517 limit += total; 1518 if (limit >= minimum) { 1519 return true; 1520 } 1521 } 1522 return false; 1523 } 1524 1525 /** 1526 * Returns an element or attribute name. This is always non-empty for 1527 * non-relaxed parsers. 1528 */ 1529 private String readName() throws IOException, XmlPullParserException { 1530 if (position >= limit && !fillBuffer(1)) { 1531 checkRelaxed("name expected"); 1532 return ""; 1533 } 1534 1535 int start = position; 1536 StringBuilder result = null; 1537 1538 // read the first character 1539 char c = buffer[position]; 1540 if ((c >= 'a' && c <= 'z') 1541 || (c >= 'A' && c <= 'Z') 1542 || c == '_' 1543 || c == ':' 1544 || c >= '\u00c0' // TODO: check the XML spec 1545 || relaxed) { 1546 position++; 1547 } else { 1548 checkRelaxed("name expected"); 1549 return ""; 1550 } 1551 1552 while (true) { 1553 /* 1554 * Make sure we have at least a single character to read from the 1555 * buffer. This mutates the buffer, so save the partial result 1556 * to the slow path string builder first. 1557 */ 1558 if (position >= limit) { 1559 if (result == null) { 1560 result = new StringBuilder(); 1561 } 1562 result.append(buffer, start, position - start); 1563 if (!fillBuffer(1)) { 1564 return result.toString(); 1565 } 1566 start = position; 1567 } 1568 1569 // read another character 1570 c = buffer[position]; 1571 if ((c >= 'a' && c <= 'z') 1572 || (c >= 'A' && c <= 'Z') 1573 || (c >= '0' && c <= '9') 1574 || c == '_' 1575 || c == '-' 1576 || c == ':' 1577 || c == '.' 1578 || c >= '\u00b7') { // TODO: check the XML spec 1579 position++; 1580 continue; 1581 } 1582 1583 // we encountered a non-name character. done! 1584 if (result == null) { 1585 return stringPool.get(buffer, start, position - start); 1586 } else { 1587 result.append(buffer, start, position - start); 1588 return result.toString(); 1589 } 1590 } 1591 } 1592 1593 private void skip() throws IOException, XmlPullParserException { 1594 while (position < limit || fillBuffer(1)) { 1595 int c = buffer[position]; 1596 if (c > ' ') { 1597 break; 1598 } 1599 position++; 1600 } 1601 } 1602 1603 // public part starts here... 1604 1605 public void setInput(Reader reader) throws XmlPullParserException { 1606 this.reader = reader; 1607 1608 type = START_DOCUMENT; 1609 parsedTopLevelStartTag = false; 1610 name = null; 1611 namespace = null; 1612 degenerated = false; 1613 attributeCount = -1; 1614 encoding = null; 1615 version = null; 1616 standalone = null; 1617 1618 if (reader == null) { 1619 return; 1620 } 1621 1622 position = 0; 1623 limit = 0; 1624 bufferStartLine = 0; 1625 bufferStartColumn = 0; 1626 depth = 0; 1627 documentEntities = null; 1628 } 1629 1630 public void setInput(InputStream is, String charset) throws XmlPullParserException { 1631 position = 0; 1632 limit = 0; 1633 boolean detectCharset = (charset == null); 1634 1635 if (is == null) { 1636 throw new IllegalArgumentException("is == null"); 1637 } 1638 1639 try { 1640 if (detectCharset) { 1641 // read the four bytes looking for an indication of the encoding in use 1642 int firstFourBytes = 0; 1643 while (limit < 4) { 1644 int i = is.read(); 1645 if (i == -1) { 1646 break; 1647 } 1648 firstFourBytes = (firstFourBytes << 8) | i; 1649 buffer[limit++] = (char) i; 1650 } 1651 1652 if (limit == 4) { 1653 switch (firstFourBytes) { 1654 case 0x00000FEFF: // UTF-32BE BOM 1655 charset = "UTF-32BE"; 1656 limit = 0; 1657 break; 1658 1659 case 0x0FFFE0000: // UTF-32LE BOM 1660 charset = "UTF-32LE"; 1661 limit = 0; 1662 break; 1663 1664 case 0x0000003c: // '<' in UTF-32BE 1665 charset = "UTF-32BE"; 1666 buffer[0] = '<'; 1667 limit = 1; 1668 break; 1669 1670 case 0x03c000000: // '<' in UTF-32LE 1671 charset = "UTF-32LE"; 1672 buffer[0] = '<'; 1673 limit = 1; 1674 break; 1675 1676 case 0x0003c003f: // "<?" in UTF-16BE 1677 charset = "UTF-16BE"; 1678 buffer[0] = '<'; 1679 buffer[1] = '?'; 1680 limit = 2; 1681 break; 1682 1683 case 0x03c003f00: // "<?" in UTF-16LE 1684 charset = "UTF-16LE"; 1685 buffer[0] = '<'; 1686 buffer[1] = '?'; 1687 limit = 2; 1688 break; 1689 1690 case 0x03c3f786d: // "<?xm" in ASCII etc. 1691 while (true) { 1692 int i = is.read(); 1693 if (i == -1) { 1694 break; 1695 } 1696 buffer[limit++] = (char) i; 1697 if (i == '>') { 1698 String s = new String(buffer, 0, limit); 1699 int i0 = s.indexOf("encoding"); 1700 if (i0 != -1) { 1701 while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') { 1702 i0++; 1703 } 1704 char deli = s.charAt(i0++); 1705 int i1 = s.indexOf(deli, i0); 1706 charset = s.substring(i0, i1); 1707 } 1708 break; 1709 } 1710 } 1711 break; 1712 1713 default: 1714 // handle a byte order mark followed by something other than <? 1715 if ((firstFourBytes & 0x0ffff0000) == 0x0feff0000) { 1716 charset = "UTF-16BE"; 1717 buffer[0] = (char) ((buffer[2] << 8) | buffer[3]); 1718 limit = 1; 1719 } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) { 1720 charset = "UTF-16LE"; 1721 buffer[0] = (char) ((buffer[3] << 8) | buffer[2]); 1722 limit = 1; 1723 } else if ((firstFourBytes & 0x0ffffff00) == 0x0efbbbf00) { 1724 charset = "UTF-8"; 1725 buffer[0] = buffer[3]; 1726 limit = 1; 1727 } 1728 } 1729 } 1730 } 1731 1732 if (charset == null) { 1733 charset = "UTF-8"; 1734 } 1735 1736 int savedLimit = limit; 1737 setInput(new InputStreamReader(is, charset)); 1738 encoding = charset; 1739 limit = savedLimit; 1740 1741 /* 1742 * Skip the optional BOM if we didn't above. This decrements limit 1743 * rather than incrementing position so that <?xml version='1.0'?> 1744 * is still at character 0. 1745 */ 1746 if (!detectCharset && peekCharacter() == 0xfeff) { 1747 limit--; 1748 System.arraycopy(buffer, 1, buffer, 0, limit); 1749 } 1750 } catch (Exception e) { 1751 throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e); 1752 } 1753 } 1754 1755 public void close() throws IOException { 1756 if (reader != null) { 1757 reader.close(); 1758 } 1759 } 1760 1761 public boolean getFeature(String feature) { 1762 if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { 1763 return processNsp; 1764 } else if (FEATURE_RELAXED.equals(feature)) { 1765 return relaxed; 1766 } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) { 1767 return processDocDecl; 1768 } else { 1769 return false; 1770 } 1771 } 1772 1773 public String getInputEncoding() { 1774 return encoding; 1775 } 1776 1777 public void defineEntityReplacementText(String entity, String value) 1778 throws XmlPullParserException { 1779 if (processDocDecl) { 1780 throw new IllegalStateException( 1781 "Entity replacement text may not be defined with DOCTYPE processing enabled."); 1782 } 1783 if (reader == null) { 1784 throw new IllegalStateException( 1785 "Entity replacement text must be defined after setInput()"); 1786 } 1787 if (documentEntities == null) { 1788 documentEntities = new HashMap<String, char[]>(); 1789 } 1790 documentEntities.put(entity, value.toCharArray()); 1791 } 1792 1793 public Object getProperty(String property) { 1794 if (property.equals(PROPERTY_XMLDECL_VERSION)) { 1795 return version; 1796 } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) { 1797 return standalone; 1798 } else if (property.equals(PROPERTY_LOCATION)) { 1799 return location != null ? location : reader.toString(); 1800 } else { 1801 return null; 1802 } 1803 } 1804 1805 /** 1806 * Returns the root element's name if it was declared in the DTD. This 1807 * equals the first tag's name for valid documents. 1808 */ 1809 public String getRootElementName() { 1810 return rootElementName; 1811 } 1812 1813 /** 1814 * Returns the document's system ID if it was declared. This is typically a 1815 * string like {@code http://www.w3.org/TR/html4/strict.dtd}. 1816 */ 1817 public String getSystemId() { 1818 return systemId; 1819 } 1820 1821 /** 1822 * Returns the document's public ID if it was declared. This is typically a 1823 * string like {@code -//W3C//DTD HTML 4.01//EN}. 1824 */ 1825 public String getPublicId() { 1826 return publicId; 1827 } 1828 1829 public int getNamespaceCount(int depth) { 1830 if (depth > this.depth) { 1831 throw new IndexOutOfBoundsException(); 1832 } 1833 return nspCounts[depth]; 1834 } 1835 1836 public String getNamespacePrefix(int pos) { 1837 return nspStack[pos * 2]; 1838 } 1839 1840 public String getNamespaceUri(int pos) { 1841 return nspStack[(pos * 2) + 1]; 1842 } 1843 1844 public String getNamespace(String prefix) { 1845 if ("xml".equals(prefix)) { 1846 return "http://www.w3.org/XML/1998/namespace"; 1847 } 1848 if ("xmlns".equals(prefix)) { 1849 return "http://www.w3.org/2000/xmlns/"; 1850 } 1851 1852 for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) { 1853 if (prefix == null) { 1854 if (nspStack[i] == null) { 1855 return nspStack[i + 1]; 1856 } 1857 } else if (prefix.equals(nspStack[i])) { 1858 return nspStack[i + 1]; 1859 } 1860 } 1861 return null; 1862 } 1863 1864 public int getDepth() { 1865 return depth; 1866 } 1867 1868 public String getPositionDescription() { 1869 StringBuilder buf = new StringBuilder(type < TYPES.length ? TYPES[type] : "unknown"); 1870 buf.append(' '); 1871 1872 if (type == START_TAG || type == END_TAG) { 1873 if (degenerated) { 1874 buf.append("(empty) "); 1875 } 1876 buf.append('<'); 1877 if (type == END_TAG) { 1878 buf.append('/'); 1879 } 1880 1881 if (prefix != null) { 1882 buf.append("{" + namespace + "}" + prefix + ":"); 1883 } 1884 buf.append(name); 1885 1886 int cnt = attributeCount * 4; 1887 for (int i = 0; i < cnt; i += 4) { 1888 buf.append(' '); 1889 if (attributes[i + 1] != null) { 1890 buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":"); 1891 } 1892 buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'"); 1893 } 1894 1895 buf.append('>'); 1896 } else if (type == IGNORABLE_WHITESPACE) { 1897 ; 1898 } else if (type != TEXT) { 1899 buf.append(getText()); 1900 } else if (isWhitespace) { 1901 buf.append("(whitespace)"); 1902 } else { 1903 String text = getText(); 1904 if (text.length() > 16) { 1905 text = text.substring(0, 16) + "..."; 1906 } 1907 buf.append(text); 1908 } 1909 1910 buf.append("@" + getLineNumber() + ":" + getColumnNumber()); 1911 if (location != null) { 1912 buf.append(" in "); 1913 buf.append(location); 1914 } else if (reader != null) { 1915 buf.append(" in "); 1916 buf.append(reader.toString()); 1917 } 1918 return buf.toString(); 1919 } 1920 1921 public int getLineNumber() { 1922 int result = bufferStartLine; 1923 for (int i = 0; i < position; i++) { 1924 if (buffer[i] == '\n') { 1925 result++; 1926 } 1927 } 1928 return result + 1; // the first line is '1' 1929 } 1930 1931 public int getColumnNumber() { 1932 int result = bufferStartColumn; 1933 for (int i = 0; i < position; i++) { 1934 if (buffer[i] == '\n') { 1935 result = 0; 1936 } else { 1937 result++; 1938 } 1939 } 1940 return result + 1; // the first column is '1' 1941 } 1942 1943 public boolean isWhitespace() throws XmlPullParserException { 1944 if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) { 1945 throw new XmlPullParserException(ILLEGAL_TYPE, this, null); 1946 } 1947 return isWhitespace; 1948 } 1949 1950 public String getText() { 1951 if (type < TEXT || (type == ENTITY_REF && unresolved)) { 1952 return null; 1953 } else if (text == null) { 1954 return ""; 1955 } else { 1956 return text; 1957 } 1958 } 1959 1960 public char[] getTextCharacters(int[] poslen) { 1961 String text = getText(); 1962 if (text == null) { 1963 poslen[0] = -1; 1964 poslen[1] = -1; 1965 return null; 1966 } 1967 char[] result = text.toCharArray(); 1968 poslen[0] = 0; 1969 poslen[1] = result.length; 1970 return result; 1971 } 1972 1973 public String getNamespace() { 1974 return namespace; 1975 } 1976 1977 public String getName() { 1978 return name; 1979 } 1980 1981 public String getPrefix() { 1982 return prefix; 1983 } 1984 1985 public boolean isEmptyElementTag() throws XmlPullParserException { 1986 if (type != START_TAG) { 1987 throw new XmlPullParserException(ILLEGAL_TYPE, this, null); 1988 } 1989 return degenerated; 1990 } 1991 1992 public int getAttributeCount() { 1993 return attributeCount; 1994 } 1995 1996 public String getAttributeType(int index) { 1997 return "CDATA"; 1998 } 1999 2000 public boolean isAttributeDefault(int index) { 2001 return false; 2002 } 2003 2004 public String getAttributeNamespace(int index) { 2005 if (index >= attributeCount) { 2006 throw new IndexOutOfBoundsException(); 2007 } 2008 return attributes[index * 4]; 2009 } 2010 2011 public String getAttributeName(int index) { 2012 if (index >= attributeCount) { 2013 throw new IndexOutOfBoundsException(); 2014 } 2015 return attributes[(index * 4) + 2]; 2016 } 2017 2018 public String getAttributePrefix(int index) { 2019 if (index >= attributeCount) { 2020 throw new IndexOutOfBoundsException(); 2021 } 2022 return attributes[(index * 4) + 1]; 2023 } 2024 2025 public String getAttributeValue(int index) { 2026 if (index >= attributeCount) { 2027 throw new IndexOutOfBoundsException(); 2028 } 2029 return attributes[(index * 4) + 3]; 2030 } 2031 2032 public String getAttributeValue(String namespace, String name) { 2033 for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) { 2034 if (attributes[i + 2].equals(name) 2035 && (namespace == null || attributes[i].equals(namespace))) { 2036 return attributes[i + 3]; 2037 } 2038 } 2039 2040 return null; 2041 } 2042 2043 public int getEventType() throws XmlPullParserException { 2044 return type; 2045 } 2046 2047 // utility methods to make XML parsing easier ... 2048 2049 public int nextTag() throws XmlPullParserException, IOException { 2050 next(); 2051 if (type == TEXT && isWhitespace) { 2052 next(); 2053 } 2054 2055 if (type != END_TAG && type != START_TAG) { 2056 throw new XmlPullParserException("unexpected type", this, null); 2057 } 2058 2059 return type; 2060 } 2061 2062 public void require(int type, String namespace, String name) 2063 throws XmlPullParserException, IOException { 2064 if (type != this.type 2065 || (namespace != null && !namespace.equals(getNamespace())) 2066 || (name != null && !name.equals(getName()))) { 2067 throw new XmlPullParserException( 2068 "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null); 2069 } 2070 } 2071 2072 public String nextText() throws XmlPullParserException, IOException { 2073 if (type != START_TAG) { 2074 throw new XmlPullParserException("precondition: START_TAG", this, null); 2075 } 2076 2077 next(); 2078 2079 String result; 2080 if (type == TEXT) { 2081 result = getText(); 2082 next(); 2083 } else { 2084 result = ""; 2085 } 2086 2087 if (type != END_TAG) { 2088 throw new XmlPullParserException("END_TAG expected", this, null); 2089 } 2090 2091 return result; 2092 } 2093 2094 public void setFeature(String feature, boolean value) throws XmlPullParserException { 2095 if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { 2096 processNsp = value; 2097 } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) { 2098 processDocDecl = value; 2099 } else if (FEATURE_RELAXED.equals(feature)) { 2100 relaxed = value; 2101 } else { 2102 throw new XmlPullParserException("unsupported feature: " + feature, this, null); 2103 } 2104 } 2105 2106 public void setProperty(String property, Object value) throws XmlPullParserException { 2107 if (property.equals(PROPERTY_LOCATION)) { 2108 location = String.valueOf(value); 2109 } else { 2110 throw new XmlPullParserException("unsupported property: " + property); 2111 } 2112 } 2113 2114 /** 2115 * A chain of buffers containing XML content. Each content source contains 2116 * the parser's primary read buffer or the characters of entities actively 2117 * being parsed. 2118 * 2119 * <p>For example, note the buffers needed to parse this document: 2120 * <pre> {@code 2121 * <!DOCTYPE foo [ 2122 * <!ENTITY baz "ghi"> 2123 * <!ENTITY bar "def &baz; jkl"> 2124 * ]> 2125 * <foo>abc &bar; mno</foo> 2126 * }</pre> 2127 * 2128 * <p>Things get interesting when the bar entity is encountered. At that 2129 * point two buffers are active: 2130 * <ol> 2131 * <li>The value for the bar entity, containing {@code "def &baz; jkl"} 2132 * <li>The parser's primary read buffer, containing {@code " mno</foo>"} 2133 * </ol> 2134 * <p>The parser will return the characters {@code "def "} from the bar 2135 * entity's buffer, and then it will encounter the baz entity. To handle 2136 * that, three buffers will be active: 2137 * <ol> 2138 * <li>The value for the baz entity, containing {@code "ghi"} 2139 * <li>The remaining value for the bar entity, containing {@code " jkl"} 2140 * <li>The parser's primary read buffer, containing {@code " mno</foo>"} 2141 * </ol> 2142 * <p>The parser will then return the characters {@code ghi jkl mno} in that 2143 * sequence by reading each buffer in sequence. 2144 */ 2145 static class ContentSource { 2146 private final ContentSource next; 2147 private final char[] buffer; 2148 private final int position; 2149 private final int limit; 2150 ContentSource(ContentSource next, char[] buffer, int position, int limit) { 2151 this.next = next; 2152 this.buffer = buffer; 2153 this.position = position; 2154 this.limit = limit; 2155 } 2156 } 2157 2158 /** 2159 * Prepends the characters of {@code newBuffer} to be read before the 2160 * current buffer. 2161 */ 2162 private void pushContentSource(char[] newBuffer) { 2163 nextContentSource = new ContentSource(nextContentSource, buffer, position, limit); 2164 buffer = newBuffer; 2165 position = 0; 2166 limit = newBuffer.length; 2167 } 2168 2169 /** 2170 * Replaces the current exhausted buffer with the next buffer in the chain. 2171 */ 2172 private void popContentSource() { 2173 buffer = nextContentSource.buffer; 2174 position = nextContentSource.position; 2175 limit = nextContentSource.limit; 2176 nextContentSource = nextContentSource.next; 2177 } 2178 } 2179