1 /* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 * sell copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 * IN THE SOFTWARE. */ 20 21 // Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode) 22 23 package org.kxml2.io; 24 25 import java.io.Closeable; 26 import java.io.IOException; 27 import java.io.InputStream; 28 import java.io.InputStreamReader; 29 import java.io.Reader; 30 import java.util.HashMap; 31 import java.util.Map; 32 import libcore.internal.StringPool; 33 import org.xmlpull.v1.XmlPullParser; 34 import org.xmlpull.v1.XmlPullParserException; 35 36 /** 37 * An XML pull parser with limited support for parsing internal DTDs. 38 */ 39 public class KXmlParser implements XmlPullParser, Closeable { 40 41 private static final String PROPERTY_XMLDECL_VERSION 42 = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version"; 43 private static final String PROPERTY_XMLDECL_STANDALONE 44 = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone"; 45 private static final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location"; 46 private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed"; 47 48 private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>(); 49 static { 50 DEFAULT_ENTITIES.put("lt", "<"); 51 DEFAULT_ENTITIES.put("gt", ">"); 52 DEFAULT_ENTITIES.put("amp", "&"); 53 DEFAULT_ENTITIES.put("apos", "'"); 54 DEFAULT_ENTITIES.put("quot", "\""); 55 } 56 57 private static final int ELEMENTDECL = 11; 58 private static final int ENTITYDECL = 12; 59 private static final int ATTLISTDECL = 13; 60 private static final int NOTATIONDECL = 14; 61 private static final int PARAMETER_ENTITY_REF = 15; 62 private static final char[] START_COMMENT = { '<', '!', '-', '-' }; 63 private static final char[] END_COMMENT = { '-', '-', '>' }; 64 private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' }; 65 private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' }; 66 private static final char[] END_CDATA = { ']', ']', '>' }; 67 private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' }; 68 private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' }; 69 private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' }; 70 private static final char[] SYSTEM = { 'S', 'Y', 'S', 'T', 'E', 'M' }; 71 private static final char[] PUBLIC = { 'P', 'U', 'B', 'L', 'I', 'C' }; 72 private static final char[] START_ELEMENT = { '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T' }; 73 private static final char[] START_ATTLIST = { '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T' }; 74 private static final char[] START_ENTITY = { '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y' }; 75 private static final char[] START_NOTATION = { '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' }; 76 private static final char[] EMPTY = new char[] { 'E', 'M', 'P', 'T', 'Y' }; 77 private static final char[] ANY = new char[]{ 'A', 'N', 'Y' }; 78 private static final char[] NDATA = new char[]{ 'N', 'D', 'A', 'T', 'A' }; 79 private static final char[] NOTATION = new char[]{ 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' }; 80 private static final char[] REQUIRED = new char[] { 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D' }; 81 private static final char[] IMPLIED = new char[] { 'I', 'M', 'P', 'L', 'I', 'E', 'D' }; 82 private static final char[] FIXED = new char[] { 'F', 'I', 'X', 'E', 'D' }; 83 84 static final private String UNEXPECTED_EOF = "Unexpected EOF"; 85 static final private String ILLEGAL_TYPE = "Wrong event type"; 86 static final private int XML_DECLARATION = 998; 87 88 // general 89 private String location; 90 91 private String version; 92 private Boolean standalone; 93 private String rootElementName; 94 private String systemId; 95 private String publicId; 96 97 /** 98 * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines 99 * entity values and default attribute values. These values are parsed at 100 * inclusion time and may contain both tags and entity references. 101 * 102 * <p>If this is false, the user must {@link #defineEntityReplacementText 103 * define entity values manually}. Such entity values are literal strings 104 * and will not be parsed. There is no API to define default attributes 105 * manually. 106 */ 107 private boolean processDocDecl; 108 private boolean processNsp; 109 private boolean relaxed; 110 private boolean keepNamespaceAttributes; 111 112 /** 113 * If non-null, the contents of the read buffer must be copied into this 114 * string builder before the read buffer is overwritten. This is used to 115 * capture the raw DTD text while parsing the DTD. 116 */ 117 private StringBuilder bufferCapture; 118 119 /** 120 * Entities defined in or for this document. This map is created lazily. 121 */ 122 private Map<String, char[]> documentEntities; 123 124 /** 125 * Default attributes in this document. The outer map's key is the element 126 * name; the inner map's key is the attribute name. Both keys should be 127 * without namespace adjustments. This map is created lazily. 128 */ 129 private Map<String, Map<String, String>> defaultAttributes; 130 131 132 private int depth; 133 private String[] elementStack = new String[16]; 134 private String[] nspStack = new String[8]; 135 private int[] nspCounts = new int[4]; 136 137 // source 138 139 private Reader reader; 140 private String encoding; 141 private ContentSource nextContentSource; 142 private char[] buffer = new char[8192]; 143 private int position = 0; 144 private int limit = 0; 145 146 /* 147 * Track the number of newlines and columns preceding the current buffer. To 148 * compute the line and column of a position in the buffer, compute the line 149 * and column in the buffer and add the preceding values. 150 */ 151 private int bufferStartLine; 152 private int bufferStartColumn; 153 154 // the current token 155 156 private int type; 157 private boolean isWhitespace; 158 private String namespace; 159 private String prefix; 160 private String name; 161 private String text; 162 163 private boolean degenerated; 164 private int attributeCount; 165 166 // true iff. we've encountered the START_TAG of an XML element at depth == 0; 167 private boolean parsedTopLevelStartTag; 168 169 /* 170 * The current element's attributes arranged in groups of 4: 171 * i + 0 = attribute namespace URI 172 * i + 1 = attribute namespace prefix 173 * i + 2 = attribute qualified name (may contain ":", as in "html:h1") 174 * i + 3 = attribute value 175 */ 176 private String[] attributes = new String[16]; 177 178 private String error; 179 180 private boolean unresolved; 181 182 public final StringPool stringPool = new StringPool(); 183 184 /** 185 * Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"} 186 * in pulled elements. Most applications will only be interested in the effective namespaces of 187 * their elements, so these attributes aren't useful. But for structure preserving wrappers like 188 * DOM, it is necessary to keep the namespace data around. 189 */ 190 public void keepNamespaceAttributes() { 191 this.keepNamespaceAttributes = true; 192 } 193 194 private boolean adjustNsp() throws XmlPullParserException { 195 boolean any = false; 196 197 for (int i = 0; i < attributeCount << 2; i += 4) { 198 String attrName = attributes[i + 2]; 199 int cut = attrName.indexOf(':'); 200 String prefix; 201 202 if (cut != -1) { 203 prefix = attrName.substring(0, cut); 204 attrName = attrName.substring(cut + 1); 205 } else if (attrName.equals("xmlns")) { 206 prefix = attrName; 207 attrName = null; 208 } else { 209 continue; 210 } 211 212 if (!prefix.equals("xmlns")) { 213 any = true; 214 } else { 215 int j = (nspCounts[depth]++) << 1; 216 217 nspStack = ensureCapacity(nspStack, j + 2); 218 nspStack[j] = attrName; 219 nspStack[j + 1] = attributes[i + 3]; 220 221 if (attrName != null && attributes[i + 3].isEmpty()) { 222 checkRelaxed("illegal empty namespace"); 223 } 224 225 if (keepNamespaceAttributes) { 226 // explicitly set the namespace for unprefixed attributes 227 // such as xmlns="http://foo" 228 attributes[i] = "http://www.w3.org/2000/xmlns/"; 229 any = true; 230 } else { 231 System.arraycopy( 232 attributes, 233 i + 4, 234 attributes, 235 i, 236 ((--attributeCount) << 2) - i); 237 238 i -= 4; 239 } 240 } 241 } 242 243 if (any) { 244 for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) { 245 246 String attrName = attributes[i + 2]; 247 int cut = attrName.indexOf(':'); 248 249 if (cut == 0 && !relaxed) { 250 throw new RuntimeException( 251 "illegal attribute name: " + attrName + " at " + this); 252 } else if (cut != -1) { 253 String attrPrefix = attrName.substring(0, cut); 254 255 attrName = attrName.substring(cut + 1); 256 257 String attrNs = getNamespace(attrPrefix); 258 259 if (attrNs == null && !relaxed) { 260 throw new RuntimeException( 261 "Undefined Prefix: " + attrPrefix + " in " + this); 262 } 263 264 attributes[i] = attrNs; 265 attributes[i + 1] = attrPrefix; 266 attributes[i + 2] = attrName; 267 } 268 } 269 } 270 271 int cut = name.indexOf(':'); 272 273 if (cut == 0) { 274 checkRelaxed("illegal tag name: " + name); 275 } 276 277 if (cut != -1) { 278 prefix = name.substring(0, cut); 279 name = name.substring(cut + 1); 280 } 281 282 this.namespace = getNamespace(prefix); 283 284 if (this.namespace == null) { 285 if (prefix != null) { 286 checkRelaxed("undefined prefix: " + prefix); 287 } 288 this.namespace = NO_NAMESPACE; 289 } 290 291 return any; 292 } 293 294 private String[] ensureCapacity(String[] arr, int required) { 295 if (arr.length >= required) { 296 return arr; 297 } 298 String[] bigger = new String[required + 16]; 299 System.arraycopy(arr, 0, bigger, 0, arr.length); 300 return bigger; 301 } 302 303 private void checkRelaxed(String errorMessage) throws XmlPullParserException { 304 if (!relaxed) { 305 throw new XmlPullParserException(errorMessage, this, null); 306 } 307 if (error == null) { 308 error = "Error: " + errorMessage; 309 } 310 } 311 312 public int next() throws XmlPullParserException, IOException { 313 return next(false); 314 } 315 316 public int nextToken() throws XmlPullParserException, IOException { 317 return next(true); 318 } 319 320 private int next(boolean justOneToken) throws IOException, XmlPullParserException { 321 if (reader == null) { 322 throw new XmlPullParserException("setInput() must be called first.", this, null); 323 } 324 325 if (type == END_TAG) { 326 depth--; 327 } 328 329 // degenerated needs to be handled before error because of possible 330 // processor expectations(!) 331 332 if (degenerated) { 333 degenerated = false; 334 type = END_TAG; 335 return type; 336 } 337 338 if (error != null) { 339 if (justOneToken) { 340 text = error; 341 type = COMMENT; 342 error = null; 343 return type; 344 } else { 345 error = null; 346 } 347 } 348 349 type = peekType(false); 350 351 if (type == XML_DECLARATION) { 352 readXmlDeclaration(); 353 type = peekType(false); 354 } 355 356 text = null; 357 isWhitespace = true; 358 prefix = null; 359 name = null; 360 namespace = null; 361 attributeCount = -1; 362 boolean throwOnResolveFailure = !justOneToken; 363 364 while (true) { 365 switch (type) { 366 367 /* 368 * Return immediately after encountering a start tag, end tag, or 369 * the end of the document. 370 */ 371 case START_TAG: 372 parseStartTag(false, throwOnResolveFailure); 373 return type; 374 case END_TAG: 375 readEndTag(); 376 return type; 377 case END_DOCUMENT: 378 return type; 379 380 /* 381 * Return after any text token when we're looking for a single 382 * token. Otherwise concatenate all text between tags. 383 */ 384 case ENTITY_REF: 385 if (justOneToken) { 386 StringBuilder entityTextBuilder = new StringBuilder(); 387 readEntity(entityTextBuilder, true, throwOnResolveFailure, ValueContext.TEXT); 388 text = entityTextBuilder.toString(); 389 break; 390 } 391 // fall-through 392 case TEXT: 393 text = readValue('<', !justOneToken, throwOnResolveFailure, ValueContext.TEXT); 394 if (depth == 0 && isWhitespace) { 395 type = IGNORABLE_WHITESPACE; 396 } 397 break; 398 case CDSECT: 399 read(START_CDATA); 400 text = readUntil(END_CDATA, true); 401 break; 402 403 /* 404 * Comments, processing instructions and declarations are returned 405 * when we're looking for a single token. Otherwise they're skipped. 406 */ 407 case COMMENT: 408 String commentText = readComment(justOneToken); 409 if (justOneToken) { 410 text = commentText; 411 } 412 break; 413 case PROCESSING_INSTRUCTION: 414 read(START_PROCESSING_INSTRUCTION); 415 String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken); 416 if (justOneToken) { 417 text = processingInstruction; 418 } 419 break; 420 case DOCDECL: 421 readDoctype(justOneToken); 422 if (parsedTopLevelStartTag) { 423 throw new XmlPullParserException("Unexpected token", this, null); 424 } 425 break; 426 427 default: 428 throw new XmlPullParserException("Unexpected token", this, null); 429 } 430 431 if (depth == 0 && (type == ENTITY_REF || type == TEXT || type == CDSECT)) { 432 throw new XmlPullParserException("Unexpected token", this, null); 433 } 434 435 if (justOneToken) { 436 return type; 437 } 438 439 if (type == IGNORABLE_WHITESPACE) { 440 text = null; 441 } 442 443 /* 444 * We've read all that we can of a non-empty text block. Always 445 * report this as text, even if it was a CDATA block or entity 446 * reference. 447 */ 448 int peek = peekType(false); 449 if (text != null && !text.isEmpty() && peek < TEXT) { 450 type = TEXT; 451 return type; 452 } 453 454 type = peek; 455 } 456 } 457 458 /** 459 * Reads text until the specified delimiter is encountered. Consumes the 460 * text and the delimiter. 461 * 462 * @param returnText true to return the read text excluding the delimiter; 463 * false to return null. 464 */ 465 private String readUntil(char[] delimiter, boolean returnText) 466 throws IOException, XmlPullParserException { 467 int start = position; 468 StringBuilder result = null; 469 470 if (returnText && text != null) { 471 result = new StringBuilder(); 472 result.append(text); 473 } 474 475 search: 476 while (true) { 477 if (position + delimiter.length > limit) { 478 if (start < position && returnText) { 479 if (result == null) { 480 result = new StringBuilder(); 481 } 482 result.append(buffer, start, position - start); 483 } 484 if (!fillBuffer(delimiter.length)) { 485 checkRelaxed(UNEXPECTED_EOF); 486 type = COMMENT; 487 return null; 488 } 489 start = position; 490 } 491 492 // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length) 493 // when the VM has better method inlining 494 for (int i = 0; i < delimiter.length; i++) { 495 if (buffer[position + i] != delimiter[i]) { 496 position++; 497 continue search; 498 } 499 } 500 501 break; 502 } 503 504 int end = position; 505 position += delimiter.length; 506 507 if (!returnText) { 508 return null; 509 } else if (result == null) { 510 return stringPool.get(buffer, start, end - start); 511 } else { 512 result.append(buffer, start, end - start); 513 return result.toString(); 514 } 515 } 516 517 /** 518 * Returns true if an XML declaration was read. 519 */ 520 private void readXmlDeclaration() throws IOException, XmlPullParserException { 521 if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) { 522 checkRelaxed("processing instructions must not start with xml"); 523 } 524 525 read(START_PROCESSING_INSTRUCTION); 526 parseStartTag(true, true); 527 528 if (attributeCount < 1 || !"version".equals(attributes[2])) { 529 checkRelaxed("version expected"); 530 } 531 532 version = attributes[3]; 533 534 int pos = 1; 535 536 if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) { 537 encoding = attributes[3 + 4]; 538 pos++; 539 } 540 541 if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) { 542 String st = attributes[3 + 4 * pos]; 543 if ("yes".equals(st)) { 544 standalone = Boolean.TRUE; 545 } else if ("no".equals(st)) { 546 standalone = Boolean.FALSE; 547 } else { 548 checkRelaxed("illegal standalone value: " + st); 549 } 550 pos++; 551 } 552 553 if (pos != attributeCount) { 554 checkRelaxed("unexpected attributes in XML declaration"); 555 } 556 557 isWhitespace = true; 558 text = null; 559 } 560 561 private String readComment(boolean returnText) throws IOException, XmlPullParserException { 562 read(START_COMMENT); 563 564 if (relaxed) { 565 return readUntil(END_COMMENT, returnText); 566 } 567 568 String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText); 569 if (peekCharacter() != '>') { 570 throw new XmlPullParserException("Comments may not contain --", this, null); 571 } 572 position++; 573 return commentText; 574 } 575 576 /** 577 * Read the document's DTD. Although this parser is non-validating, the DTD 578 * must be parsed to capture entity values and default attribute values. 579 */ 580 private void readDoctype(boolean saveDtdText) throws IOException, XmlPullParserException { 581 read(START_DOCTYPE); 582 583 int startPosition = -1; 584 if (saveDtdText) { 585 bufferCapture = new StringBuilder(); 586 startPosition = position; 587 } 588 try { 589 skip(); 590 rootElementName = readName(); 591 readExternalId(true, true); 592 skip(); 593 if (peekCharacter() == '[') { 594 readInternalSubset(); 595 } 596 skip(); 597 } finally { 598 if (saveDtdText) { 599 bufferCapture.append(buffer, 0, position); 600 bufferCapture.delete(0, startPosition); 601 text = bufferCapture.toString(); 602 bufferCapture = null; 603 } 604 } 605 606 read('>'); 607 } 608 609 /** 610 * Reads an external ID of one of these two forms: 611 * SYSTEM "quoted system name" 612 * PUBLIC "quoted public id" "quoted system name" 613 * 614 * If the system name is not required, this also supports lone public IDs of 615 * this form: 616 * PUBLIC "quoted public id" 617 * 618 * Returns true if any ID was read. 619 */ 620 private boolean readExternalId(boolean requireSystemName, boolean assignFields) 621 throws IOException, XmlPullParserException { 622 skip(); 623 int c = peekCharacter(); 624 625 if (c == 'S') { 626 read(SYSTEM); 627 } else if (c == 'P') { 628 read(PUBLIC); 629 skip(); 630 if (assignFields) { 631 publicId = readQuotedId(true); 632 } else { 633 readQuotedId(false); 634 } 635 } else { 636 return false; 637 } 638 639 skip(); 640 641 if (!requireSystemName) { 642 int delimiter = peekCharacter(); 643 if (delimiter != '"' && delimiter != '\'') { 644 return true; // no system name! 645 } 646 } 647 648 if (assignFields) { 649 systemId = readQuotedId(true); 650 } else { 651 readQuotedId(false); 652 } 653 return true; 654 } 655 656 private static final char[] SINGLE_QUOTE = new char[] { '\'' }; 657 private static final char[] DOUBLE_QUOTE = new char[] { '"' }; 658 659 /** 660 * Reads a quoted string, performing no entity escaping of the contents. 661 */ 662 private String readQuotedId(boolean returnText) throws IOException, XmlPullParserException { 663 int quote = peekCharacter(); 664 char[] delimiter; 665 if (quote == '"') { 666 delimiter = DOUBLE_QUOTE; 667 } else if (quote == '\'') { 668 delimiter = SINGLE_QUOTE; 669 } else { 670 throw new XmlPullParserException("Expected a quoted string", this, null); 671 } 672 position++; 673 return readUntil(delimiter, returnText); 674 } 675 676 private void readInternalSubset() throws IOException, XmlPullParserException { 677 read('['); 678 679 while (true) { 680 skip(); 681 if (peekCharacter() == ']') { 682 position++; 683 return; 684 } 685 686 int declarationType = peekType(true); 687 switch (declarationType) { 688 case ELEMENTDECL: 689 readElementDeclaration(); 690 break; 691 692 case ATTLISTDECL: 693 readAttributeListDeclaration(); 694 break; 695 696 case ENTITYDECL: 697 readEntityDeclaration(); 698 break; 699 700 case NOTATIONDECL: 701 readNotationDeclaration(); 702 break; 703 704 case PROCESSING_INSTRUCTION: 705 read(START_PROCESSING_INSTRUCTION); 706 readUntil(END_PROCESSING_INSTRUCTION, false); 707 break; 708 709 case COMMENT: 710 readComment(false); 711 break; 712 713 case PARAMETER_ENTITY_REF: 714 throw new XmlPullParserException( 715 "Parameter entity references are not supported", this, null); 716 717 default: 718 throw new XmlPullParserException("Unexpected token", this, null); 719 } 720 } 721 } 722 723 /** 724 * Read an element declaration. This contains a name and a content spec. 725 * <!ELEMENT foo EMPTY > 726 * <!ELEMENT foo (bar?,(baz|quux)) > 727 * <!ELEMENT foo (#PCDATA|bar)* > 728 */ 729 private void readElementDeclaration() throws IOException, XmlPullParserException { 730 read(START_ELEMENT); 731 skip(); 732 readName(); 733 readContentSpec(); 734 skip(); 735 read('>'); 736 } 737 738 /** 739 * Read an element content spec. This is a regular expression-like pattern 740 * of names or other content specs. The following operators are supported: 741 * sequence: (a,b,c) 742 * choice: (a|b|c) 743 * optional: a? 744 * one or more: a+ 745 * any number: a* 746 * 747 * The special name '#PCDATA' is permitted but only if it is the first 748 * element of the first group: 749 * (#PCDATA|a|b) 750 * 751 * The top-level element must be either a choice, a sequence, or one of the 752 * special names EMPTY and ANY. 753 */ 754 private void readContentSpec() throws IOException, XmlPullParserException { 755 // this implementation is very lenient; it scans for balanced parens only 756 skip(); 757 int c = peekCharacter(); 758 if (c == '(') { 759 int depth = 0; 760 do { 761 if (c == '(') { 762 depth++; 763 } else if (c == ')') { 764 depth--; 765 } else if (c == -1) { 766 throw new XmlPullParserException( 767 "Unterminated element content spec", this, null); 768 } 769 position++; 770 c = peekCharacter(); 771 } while (depth > 0); 772 773 if (c == '*' || c == '?' || c == '+') { 774 position++; 775 } 776 } else if (c == EMPTY[0]) { 777 read(EMPTY); 778 } else if (c == ANY[0]) { 779 read(ANY); 780 } else { 781 throw new XmlPullParserException("Expected element content spec", this, null); 782 } 783 } 784 785 /** 786 * Reads an attribute list declaration such as the following: 787 * <!ATTLIST foo 788 * bar CDATA #IMPLIED 789 * quux (a|b|c) "c" 790 * baz NOTATION (a|b|c) #FIXED "c"> 791 * 792 * Each attribute has a name, type and default. 793 * 794 * Types are one of the built-in types (CDATA, ID, IDREF, IDREFS, ENTITY, 795 * ENTITIES, NMTOKEN, or NMTOKENS), an enumerated type "(list|of|options)" 796 * or NOTATION followed by an enumerated type. 797 * 798 * The default is either #REQUIRED, #IMPLIED, #FIXED, a quoted value, or 799 * #FIXED with a quoted value. 800 */ 801 private void readAttributeListDeclaration() throws IOException, XmlPullParserException { 802 read(START_ATTLIST); 803 skip(); 804 String elementName = readName(); 805 806 while (true) { 807 skip(); 808 int c = peekCharacter(); 809 if (c == '>') { 810 position++; 811 return; 812 } 813 814 // attribute name 815 String attributeName = readName(); 816 817 // attribute type 818 skip(); 819 if (position + 1 >= limit && !fillBuffer(2)) { 820 throw new XmlPullParserException("Malformed attribute list", this, null); 821 } 822 if (buffer[position] == NOTATION[0] && buffer[position + 1] == NOTATION[1]) { 823 read(NOTATION); 824 skip(); 825 } 826 c = peekCharacter(); 827 if (c == '(') { 828 position++; 829 while (true) { 830 skip(); 831 readName(); 832 skip(); 833 c = peekCharacter(); 834 if (c == ')') { 835 position++; 836 break; 837 } else if (c == '|') { 838 position++; 839 } else { 840 throw new XmlPullParserException("Malformed attribute type", this, null); 841 } 842 } 843 } else { 844 readName(); 845 } 846 847 // default value 848 skip(); 849 c = peekCharacter(); 850 if (c == '#') { 851 position++; 852 c = peekCharacter(); 853 if (c == 'R') { 854 read(REQUIRED); 855 } else if (c == 'I') { 856 read(IMPLIED); 857 } else if (c == 'F') { 858 read(FIXED); 859 } else { 860 throw new XmlPullParserException("Malformed attribute type", this, null); 861 } 862 skip(); 863 c = peekCharacter(); 864 } 865 if (c == '"' || c == '\'') { 866 position++; 867 // TODO: does this do escaping correctly? 868 String value = readValue((char) c, true, true, ValueContext.ATTRIBUTE); 869 if (peekCharacter() == c) { 870 position++; 871 } 872 defineAttributeDefault(elementName, attributeName, value); 873 } 874 } 875 } 876 877 private void defineAttributeDefault(String elementName, String attributeName, String value) { 878 if (defaultAttributes == null) { 879 defaultAttributes = new HashMap<String, Map<String, String>>(); 880 } 881 Map<String, String> elementAttributes = defaultAttributes.get(elementName); 882 if (elementAttributes == null) { 883 elementAttributes = new HashMap<String, String>(); 884 defaultAttributes.put(elementName, elementAttributes); 885 } 886 elementAttributes.put(attributeName, value); 887 } 888 889 /** 890 * Read an entity declaration. The value of internal entities are inline: 891 * <!ENTITY foo "bar"> 892 * 893 * The values of external entities must be retrieved by URL or path: 894 * <!ENTITY foo SYSTEM "http://host/file"> 895 * <!ENTITY foo PUBLIC "-//Android//Foo//EN" "http://host/file"> 896 * <!ENTITY foo SYSTEM "../file.png" NDATA png> 897 * 898 * Entities may be general or parameterized. Parameterized entities are 899 * marked by a percent sign. Such entities may only be used in the DTD: 900 * <!ENTITY % foo "bar"> 901 */ 902 private void readEntityDeclaration() throws IOException, XmlPullParserException { 903 read(START_ENTITY); 904 boolean generalEntity = true; 905 906 skip(); 907 if (peekCharacter() == '%') { 908 generalEntity = false; 909 position++; 910 skip(); 911 } 912 913 String name = readName(); 914 915 skip(); 916 int quote = peekCharacter(); 917 String entityValue; 918 if (quote == '"' || quote == '\'') { 919 position++; 920 entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION); 921 if (peekCharacter() == quote) { 922 position++; 923 } 924 } else if (readExternalId(true, false)) { 925 /* 926 * Map external entities to the empty string. This is dishonest, 927 * but it's consistent with Android's Expat pull parser. 928 */ 929 entityValue = ""; 930 skip(); 931 if (peekCharacter() == NDATA[0]) { 932 read(NDATA); 933 skip(); 934 readName(); 935 } 936 } else { 937 throw new XmlPullParserException("Expected entity value or external ID", this, null); 938 } 939 940 if (generalEntity && processDocDecl) { 941 if (documentEntities == null) { 942 documentEntities = new HashMap<String, char[]>(); 943 } 944 documentEntities.put(name, entityValue.toCharArray()); 945 } 946 947 skip(); 948 read('>'); 949 } 950 951 private void readNotationDeclaration() throws IOException, XmlPullParserException { 952 read(START_NOTATION); 953 skip(); 954 readName(); 955 if (!readExternalId(false, false)) { 956 throw new XmlPullParserException( 957 "Expected external ID or public ID for notation", this, null); 958 } 959 skip(); 960 read('>'); 961 } 962 963 private void readEndTag() throws IOException, XmlPullParserException { 964 read('<'); 965 read('/'); 966 name = readName(); // TODO: pass the expected name in as a hint? 967 skip(); 968 read('>'); 969 970 int sp = (depth - 1) * 4; 971 972 if (depth == 0) { 973 checkRelaxed("read end tag " + name + " with no tags open"); 974 type = COMMENT; 975 return; 976 } 977 978 if (name.equals(elementStack[sp + 3])) { 979 namespace = elementStack[sp]; 980 prefix = elementStack[sp + 1]; 981 name = elementStack[sp + 2]; 982 } else if (!relaxed) { 983 throw new XmlPullParserException( 984 "expected: /" + elementStack[sp + 3] + " read: " + name, this, null); 985 } 986 } 987 988 /** 989 * Returns the type of the next token. 990 */ 991 private int peekType(boolean inDeclaration) throws IOException, XmlPullParserException { 992 if (position >= limit && !fillBuffer(1)) { 993 return END_DOCUMENT; 994 } 995 996 switch (buffer[position]) { 997 case '&': 998 return ENTITY_REF; // & 999 case '<': 1000 if (position + 3 >= limit && !fillBuffer(4)) { 1001 throw new XmlPullParserException("Dangling <", this, null); 1002 } 1003 1004 switch (buffer[position + 1]) { 1005 case '/': 1006 return END_TAG; // </ 1007 case '?': 1008 // we're looking for "<?xml " with case insensitivity 1009 if ((position + 5 < limit || fillBuffer(6)) 1010 && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X') 1011 && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M') 1012 && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L') 1013 && (buffer[position + 5] == ' ')) { 1014 return XML_DECLARATION; // <?xml 1015 } else { 1016 return PROCESSING_INSTRUCTION; // <? 1017 } 1018 case '!': 1019 switch (buffer[position + 2]) { 1020 case 'D': 1021 return DOCDECL; // <!D 1022 case '[': 1023 return CDSECT; // <![ 1024 case '-': 1025 return COMMENT; // <!- 1026 case 'E': 1027 switch (buffer[position + 3]) { 1028 case 'L': 1029 return ELEMENTDECL; // <!EL 1030 case 'N': 1031 return ENTITYDECL; // <!EN 1032 } 1033 break; 1034 case 'A': 1035 return ATTLISTDECL; // <!A 1036 case 'N': 1037 return NOTATIONDECL; // <!N 1038 } 1039 throw new XmlPullParserException("Unexpected <!", this, null); 1040 default: 1041 return START_TAG; // < 1042 } 1043 case '%': 1044 return inDeclaration ? PARAMETER_ENTITY_REF : TEXT; 1045 default: 1046 return TEXT; 1047 } 1048 } 1049 1050 /** 1051 * Sets name and attributes 1052 */ 1053 private void parseStartTag(boolean xmldecl, boolean throwOnResolveFailure) 1054 throws IOException, XmlPullParserException { 1055 if (!xmldecl) { 1056 read('<'); 1057 } 1058 name = readName(); 1059 attributeCount = 0; 1060 1061 while (true) { 1062 skip(); 1063 1064 if (position >= limit && !fillBuffer(1)) { 1065 checkRelaxed(UNEXPECTED_EOF); 1066 return; 1067 } 1068 1069 int c = buffer[position]; 1070 1071 if (xmldecl) { 1072 if (c == '?') { 1073 position++; 1074 read('>'); 1075 return; 1076 } 1077 } else { 1078 if (c == '/') { 1079 degenerated = true; 1080 position++; 1081 skip(); 1082 read('>'); 1083 break; 1084 } else if (c == '>') { 1085 position++; 1086 break; 1087 } 1088 } 1089 1090 String attrName = readName(); 1091 1092 int i = (attributeCount++) * 4; 1093 attributes = ensureCapacity(attributes, i + 4); 1094 attributes[i] = ""; 1095 attributes[i + 1] = null; 1096 attributes[i + 2] = attrName; 1097 1098 skip(); 1099 if (position >= limit && !fillBuffer(1)) { 1100 checkRelaxed(UNEXPECTED_EOF); 1101 return; 1102 } 1103 1104 if (buffer[position] == '=') { 1105 position++; 1106 1107 skip(); 1108 if (position >= limit && !fillBuffer(1)) { 1109 checkRelaxed(UNEXPECTED_EOF); 1110 return; 1111 } 1112 char delimiter = buffer[position]; 1113 1114 if (delimiter == '\'' || delimiter == '"') { 1115 position++; 1116 } else if (relaxed) { 1117 delimiter = ' '; 1118 } else { 1119 throw new XmlPullParserException("attr value delimiter missing!", this, null); 1120 } 1121 1122 attributes[i + 3] = readValue(delimiter, true, throwOnResolveFailure, 1123 ValueContext.ATTRIBUTE); 1124 1125 if (delimiter != ' ' && peekCharacter() == delimiter) { 1126 position++; // end quote 1127 } 1128 } else if (relaxed) { 1129 attributes[i + 3] = attrName; 1130 } else { 1131 checkRelaxed("Attr.value missing f. " + attrName); 1132 attributes[i + 3] = attrName; 1133 } 1134 } 1135 1136 int sp = depth++ * 4; 1137 if (depth == 1) { 1138 parsedTopLevelStartTag = true; 1139 } 1140 elementStack = ensureCapacity(elementStack, sp + 4); 1141 elementStack[sp + 3] = name; 1142 1143 if (depth >= nspCounts.length) { 1144 int[] bigger = new int[depth + 4]; 1145 System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length); 1146 nspCounts = bigger; 1147 } 1148 1149 nspCounts[depth] = nspCounts[depth - 1]; 1150 1151 if (processNsp) { 1152 adjustNsp(); 1153 } else { 1154 namespace = ""; 1155 } 1156 1157 // For consistency with Expat, add default attributes after fixing namespaces. 1158 if (defaultAttributes != null) { 1159 Map<String, String> elementDefaultAttributes = defaultAttributes.get(name); 1160 if (elementDefaultAttributes != null) { 1161 for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) { 1162 if (getAttributeValue(null, entry.getKey()) != null) { 1163 continue; // an explicit value overrides the default 1164 } 1165 1166 int i = (attributeCount++) * 4; 1167 attributes = ensureCapacity(attributes, i + 4); 1168 attributes[i] = ""; 1169 attributes[i + 1] = null; 1170 attributes[i + 2] = entry.getKey(); 1171 attributes[i + 3] = entry.getValue(); 1172 } 1173 } 1174 } 1175 1176 elementStack[sp] = namespace; 1177 elementStack[sp + 1] = prefix; 1178 elementStack[sp + 2] = name; 1179 } 1180 1181 /** 1182 * Reads an entity reference from the buffer, resolves it, and writes the 1183 * resolved entity to {@code out}. If the entity cannot be read or resolved, 1184 * {@code out} will contain the partial entity reference. 1185 */ 1186 private void readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure, 1187 ValueContext valueContext) throws IOException, XmlPullParserException { 1188 int start = out.length(); 1189 1190 if (buffer[position++] != '&') { 1191 throw new AssertionError(); 1192 } 1193 1194 out.append('&'); 1195 1196 while (true) { 1197 int c = peekCharacter(); 1198 1199 if (c == ';') { 1200 out.append(';'); 1201 position++; 1202 break; 1203 1204 } else if (c >= 128 1205 || (c >= '0' && c <= '9') 1206 || (c >= 'a' && c <= 'z') 1207 || (c >= 'A' && c <= 'Z') 1208 || c == '_' 1209 || c == '-' 1210 || c == '#') { 1211 position++; 1212 out.append((char) c); 1213 1214 } else if (relaxed) { 1215 // intentionally leave the partial reference in 'out' 1216 return; 1217 1218 } else { 1219 throw new XmlPullParserException("unterminated entity ref", this, null); 1220 } 1221 } 1222 1223 String code = out.substring(start + 1, out.length() - 1); 1224 1225 if (isEntityToken) { 1226 name = code; 1227 } 1228 1229 if (code.startsWith("#")) { 1230 try { 1231 int c = code.startsWith("#x") 1232 ? Integer.parseInt(code.substring(2), 16) 1233 : Integer.parseInt(code.substring(1)); 1234 out.delete(start, out.length()); 1235 out.appendCodePoint(c); 1236 unresolved = false; 1237 return; 1238 } catch (NumberFormatException notANumber) { 1239 throw new XmlPullParserException("Invalid character reference: &" + code); 1240 } catch (IllegalArgumentException invalidCodePoint) { 1241 throw new XmlPullParserException("Invalid character reference: &" + code); 1242 } 1243 } 1244 1245 if (valueContext == ValueContext.ENTITY_DECLARATION) { 1246 // keep the unresolved &code; in the text to resolve later 1247 return; 1248 } 1249 1250 String defaultEntity = DEFAULT_ENTITIES.get(code); 1251 if (defaultEntity != null) { 1252 out.delete(start, out.length()); 1253 unresolved = false; 1254 out.append(defaultEntity); 1255 return; 1256 } 1257 1258 char[] resolved; 1259 if (documentEntities != null && (resolved = documentEntities.get(code)) != null) { 1260 out.delete(start, out.length()); 1261 unresolved = false; 1262 if (processDocDecl) { 1263 pushContentSource(resolved); // parse the entity as XML 1264 } else { 1265 out.append(resolved); // include the entity value as text 1266 } 1267 return; 1268 } 1269 1270 /* 1271 * The parser skipped an external DTD, and now we've encountered an 1272 * unknown entity that could have been declared there. Map it to the 1273 * empty string. This is dishonest, but it's consistent with Android's 1274 * old ExpatPullParser. 1275 */ 1276 if (systemId != null) { 1277 out.delete(start, out.length()); 1278 return; 1279 } 1280 1281 // keep the unresolved entity "&code;" in the text for relaxed clients 1282 unresolved = true; 1283 if (throwOnResolveFailure) { 1284 checkRelaxed("unresolved: &" + code + ";"); 1285 } 1286 } 1287 1288 /** 1289 * Where a value is found impacts how that value is interpreted. For 1290 * example, in attributes, "\n" must be replaced with a space character. In 1291 * text, "]]>" is forbidden. In entity declarations, named references are 1292 * not resolved. 1293 */ 1294 enum ValueContext { 1295 ATTRIBUTE, 1296 TEXT, 1297 ENTITY_DECLARATION 1298 } 1299 1300 /** 1301 * Returns the current text or attribute value. This also has the side 1302 * effect of setting isWhitespace to false if a non-whitespace character is 1303 * encountered. 1304 * 1305 * @param delimiter {@code <} for text, {@code "} and {@code '} for quoted 1306 * attributes, or a space for unquoted attributes. 1307 */ 1308 private String readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure, 1309 ValueContext valueContext) throws IOException, XmlPullParserException { 1310 1311 /* 1312 * This method returns all of the characters from the current position 1313 * through to an appropriate delimiter. 1314 * 1315 * If we're lucky (which we usually are), we'll return a single slice of 1316 * the buffer. This fast path avoids allocating a string builder. 1317 * 1318 * There are 6 unlucky characters we could encounter: 1319 * - "&": entities must be resolved. 1320 * - "%": parameter entities are unsupported in entity values. 1321 * - "<": this isn't permitted in attributes unless relaxed. 1322 * - "]": this requires a lookahead to defend against the forbidden 1323 * CDATA section delimiter "]]>". 1324 * - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it 1325 * isn't followed by "\n", we replace "\r" with either a "\n" 1326 * in text nodes or a space in attribute values. 1327 * - "\n": In attribute values, "\n" must be replaced with a space. 1328 * 1329 * We could also get unlucky by needing to refill the buffer midway 1330 * through the text. 1331 */ 1332 1333 int start = position; 1334 StringBuilder result = null; 1335 1336 // if a text section was already started, prefix the start 1337 if (valueContext == ValueContext.TEXT && text != null) { 1338 result = new StringBuilder(); 1339 result.append(text); 1340 } 1341 1342 while (true) { 1343 1344 /* 1345 * Make sure we have at least a single character to read from the 1346 * buffer. This mutates the buffer, so save the partial result 1347 * to the slow path string builder first. 1348 */ 1349 if (position >= limit) { 1350 if (start < position) { 1351 if (result == null) { 1352 result = new StringBuilder(); 1353 } 1354 result.append(buffer, start, position - start); 1355 } 1356 if (!fillBuffer(1)) { 1357 return result != null ? result.toString() : ""; 1358 } 1359 start = position; 1360 } 1361 1362 char c = buffer[position]; 1363 1364 if (c == delimiter 1365 || (delimiter == ' ' && (c <= ' ' || c == '>')) 1366 || c == '&' && !resolveEntities) { 1367 break; 1368 } 1369 1370 if (c != '\r' 1371 && (c != '\n' || valueContext != ValueContext.ATTRIBUTE) 1372 && c != '&' 1373 && c != '<' 1374 && (c != ']' || valueContext != ValueContext.TEXT) 1375 && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) { 1376 isWhitespace &= (c <= ' '); 1377 position++; 1378 continue; 1379 } 1380 1381 /* 1382 * We've encountered an unlucky character! Convert from fast 1383 * path to slow path if we haven't done so already. 1384 */ 1385 if (result == null) { 1386 result = new StringBuilder(); 1387 } 1388 result.append(buffer, start, position - start); 1389 1390 if (c == '\r') { 1391 if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') { 1392 position++; 1393 } 1394 c = (valueContext == ValueContext.ATTRIBUTE) ? ' ' : '\n'; 1395 1396 } else if (c == '\n') { 1397 c = ' '; 1398 1399 } else if (c == '&') { 1400 isWhitespace = false; // TODO: what if the entity resolves to whitespace? 1401 readEntity(result, false, throwOnResolveFailure, valueContext); 1402 start = position; 1403 continue; 1404 1405 } else if (c == '<') { 1406 if (valueContext == ValueContext.ATTRIBUTE) { 1407 checkRelaxed("Illegal: \"<\" inside attribute value"); 1408 } 1409 isWhitespace = false; 1410 1411 } else if (c == ']') { 1412 if ((position + 2 < limit || fillBuffer(3)) 1413 && buffer[position + 1] == ']' && buffer[position + 2] == '>') { 1414 checkRelaxed("Illegal: \"]]>\" outside CDATA section"); 1415 } 1416 isWhitespace = false; 1417 1418 } else if (c == '%') { 1419 throw new XmlPullParserException("This parser doesn't support parameter entities", 1420 this, null); 1421 1422 } else { 1423 throw new AssertionError(); 1424 } 1425 1426 position++; 1427 result.append(c); 1428 start = position; 1429 } 1430 1431 if (result == null) { 1432 return stringPool.get(buffer, start, position - start); 1433 } else { 1434 result.append(buffer, start, position - start); 1435 return result.toString(); 1436 } 1437 } 1438 1439 private void read(char expected) throws IOException, XmlPullParserException { 1440 int c = peekCharacter(); 1441 if (c != expected) { 1442 checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'"); 1443 if (c == -1) { 1444 return; // On EOF, don't move position beyond limit 1445 } 1446 } 1447 position++; 1448 } 1449 1450 private void read(char[] chars) throws IOException, XmlPullParserException { 1451 if (position + chars.length > limit && !fillBuffer(chars.length)) { 1452 checkRelaxed("expected: '" + new String(chars) + "' but was EOF"); 1453 return; 1454 } 1455 1456 // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length) 1457 // when the VM has better method inlining 1458 for (int i = 0; i < chars.length; i++) { 1459 if (buffer[position + i] != chars[i]) { 1460 checkRelaxed("expected: \"" + new String(chars) + "\" but was \"" 1461 + new String(buffer, position, chars.length) + "...\""); 1462 } 1463 } 1464 1465 position += chars.length; 1466 } 1467 1468 private int peekCharacter() throws IOException, XmlPullParserException { 1469 if (position < limit || fillBuffer(1)) { 1470 return buffer[position]; 1471 } 1472 return -1; 1473 } 1474 1475 /** 1476 * Returns true once {@code limit - position >= minimum}. If the data is 1477 * exhausted before that many characters are available, this returns 1478 * false. 1479 */ 1480 private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException { 1481 // If we've exhausted the current content source, remove it 1482 while (nextContentSource != null) { 1483 if (position < limit) { 1484 throw new XmlPullParserException("Unbalanced entity!", this, null); 1485 } 1486 popContentSource(); 1487 if (limit - position >= minimum) { 1488 return true; 1489 } 1490 } 1491 1492 // Before clobbering the old characters, update where buffer starts 1493 for (int i = 0; i < position; i++) { 1494 if (buffer[i] == '\n') { 1495 bufferStartLine++; 1496 bufferStartColumn = 0; 1497 } else { 1498 bufferStartColumn++; 1499 } 1500 } 1501 1502 if (bufferCapture != null) { 1503 bufferCapture.append(buffer, 0, position); 1504 } 1505 1506 if (limit != position) { 1507 limit -= position; 1508 System.arraycopy(buffer, position, buffer, 0, limit); 1509 } else { 1510 limit = 0; 1511 } 1512 1513 position = 0; 1514 int total; 1515 while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) { 1516 limit += total; 1517 if (limit >= minimum) { 1518 return true; 1519 } 1520 } 1521 return false; 1522 } 1523 1524 /** 1525 * Returns an element or attribute name. This is always non-empty for 1526 * non-relaxed parsers. 1527 */ 1528 private String readName() throws IOException, XmlPullParserException { 1529 if (position >= limit && !fillBuffer(1)) { 1530 checkRelaxed("name expected"); 1531 return ""; 1532 } 1533 1534 int start = position; 1535 StringBuilder result = null; 1536 1537 // read the first character 1538 char c = buffer[position]; 1539 if ((c >= 'a' && c <= 'z') 1540 || (c >= 'A' && c <= 'Z') 1541 || c == '_' 1542 || c == ':' 1543 || c >= '\u00c0' // TODO: check the XML spec 1544 || relaxed) { 1545 position++; 1546 } else { 1547 checkRelaxed("name expected"); 1548 return ""; 1549 } 1550 1551 while (true) { 1552 /* 1553 * Make sure we have at least a single character to read from the 1554 * buffer. This mutates the buffer, so save the partial result 1555 * to the slow path string builder first. 1556 */ 1557 if (position >= limit) { 1558 if (result == null) { 1559 result = new StringBuilder(); 1560 } 1561 result.append(buffer, start, position - start); 1562 if (!fillBuffer(1)) { 1563 return result.toString(); 1564 } 1565 start = position; 1566 } 1567 1568 // read another character 1569 c = buffer[position]; 1570 if ((c >= 'a' && c <= 'z') 1571 || (c >= 'A' && c <= 'Z') 1572 || (c >= '0' && c <= '9') 1573 || c == '_' 1574 || c == '-' 1575 || c == ':' 1576 || c == '.' 1577 || c >= '\u00b7') { // TODO: check the XML spec 1578 position++; 1579 continue; 1580 } 1581 1582 // we encountered a non-name character. done! 1583 if (result == null) { 1584 return stringPool.get(buffer, start, position - start); 1585 } else { 1586 result.append(buffer, start, position - start); 1587 return result.toString(); 1588 } 1589 } 1590 } 1591 1592 private void skip() throws IOException, XmlPullParserException { 1593 while (position < limit || fillBuffer(1)) { 1594 int c = buffer[position]; 1595 if (c > ' ') { 1596 break; 1597 } 1598 position++; 1599 } 1600 } 1601 1602 // public part starts here... 1603 1604 public void setInput(Reader reader) throws XmlPullParserException { 1605 this.reader = reader; 1606 1607 type = START_DOCUMENT; 1608 name = null; 1609 namespace = null; 1610 degenerated = false; 1611 attributeCount = -1; 1612 encoding = null; 1613 version = null; 1614 standalone = null; 1615 1616 if (reader == null) { 1617 return; 1618 } 1619 1620 position = 0; 1621 limit = 0; 1622 bufferStartLine = 0; 1623 bufferStartColumn = 0; 1624 depth = 0; 1625 documentEntities = null; 1626 } 1627 1628 public void setInput(InputStream is, String charset) throws XmlPullParserException { 1629 position = 0; 1630 limit = 0; 1631 boolean detectCharset = (charset == null); 1632 1633 if (is == null) { 1634 throw new IllegalArgumentException("is == null"); 1635 } 1636 1637 try { 1638 if (detectCharset) { 1639 // read the four bytes looking for an indication of the encoding in use 1640 int firstFourBytes = 0; 1641 while (limit < 4) { 1642 int i = is.read(); 1643 if (i == -1) { 1644 break; 1645 } 1646 firstFourBytes = (firstFourBytes << 8) | i; 1647 buffer[limit++] = (char) i; 1648 } 1649 1650 if (limit == 4) { 1651 switch (firstFourBytes) { 1652 case 0x00000FEFF: // UTF-32BE BOM 1653 charset = "UTF-32BE"; 1654 limit = 0; 1655 break; 1656 1657 case 0x0FFFE0000: // UTF-32LE BOM 1658 charset = "UTF-32LE"; 1659 limit = 0; 1660 break; 1661 1662 case 0x0000003c: // '<' in UTF-32BE 1663 charset = "UTF-32BE"; 1664 buffer[0] = '<'; 1665 limit = 1; 1666 break; 1667 1668 case 0x03c000000: // '<' in UTF-32LE 1669 charset = "UTF-32LE"; 1670 buffer[0] = '<'; 1671 limit = 1; 1672 break; 1673 1674 case 0x0003c003f: // "<?" in UTF-16BE 1675 charset = "UTF-16BE"; 1676 buffer[0] = '<'; 1677 buffer[1] = '?'; 1678 limit = 2; 1679 break; 1680 1681 case 0x03c003f00: // "<?" in UTF-16LE 1682 charset = "UTF-16LE"; 1683 buffer[0] = '<'; 1684 buffer[1] = '?'; 1685 limit = 2; 1686 break; 1687 1688 case 0x03c3f786d: // "<?xm" in ASCII etc. 1689 while (true) { 1690 int i = is.read(); 1691 if (i == -1) { 1692 break; 1693 } 1694 buffer[limit++] = (char) i; 1695 if (i == '>') { 1696 String s = new String(buffer, 0, limit); 1697 int i0 = s.indexOf("encoding"); 1698 if (i0 != -1) { 1699 while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') { 1700 i0++; 1701 } 1702 char deli = s.charAt(i0++); 1703 int i1 = s.indexOf(deli, i0); 1704 charset = s.substring(i0, i1); 1705 } 1706 break; 1707 } 1708 } 1709 break; 1710 1711 default: 1712 // handle a byte order mark followed by something other than <? 1713 if ((firstFourBytes & 0x0ffff0000) == 0x0feff0000) { 1714 charset = "UTF-16BE"; 1715 buffer[0] = (char) ((buffer[2] << 8) | buffer[3]); 1716 limit = 1; 1717 } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) { 1718 charset = "UTF-16LE"; 1719 buffer[0] = (char) ((buffer[3] << 8) | buffer[2]); 1720 limit = 1; 1721 } else if ((firstFourBytes & 0x0ffffff00) == 0x0efbbbf00) { 1722 charset = "UTF-8"; 1723 buffer[0] = buffer[3]; 1724 limit = 1; 1725 } 1726 } 1727 } 1728 } 1729 1730 if (charset == null) { 1731 charset = "UTF-8"; 1732 } 1733 1734 int savedLimit = limit; 1735 setInput(new InputStreamReader(is, charset)); 1736 encoding = charset; 1737 limit = savedLimit; 1738 1739 /* 1740 * Skip the optional BOM if we didn't above. This decrements limit 1741 * rather than incrementing position so that <?xml version='1.0'?> 1742 * is still at character 0. 1743 */ 1744 if (!detectCharset && peekCharacter() == 0xfeff) { 1745 limit--; 1746 System.arraycopy(buffer, 1, buffer, 0, limit); 1747 } 1748 } catch (Exception e) { 1749 throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e); 1750 } 1751 } 1752 1753 public void close() throws IOException { 1754 if (reader != null) { 1755 reader.close(); 1756 } 1757 } 1758 1759 public boolean getFeature(String feature) { 1760 if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { 1761 return processNsp; 1762 } else if (FEATURE_RELAXED.equals(feature)) { 1763 return relaxed; 1764 } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) { 1765 return processDocDecl; 1766 } else { 1767 return false; 1768 } 1769 } 1770 1771 public String getInputEncoding() { 1772 return encoding; 1773 } 1774 1775 public void defineEntityReplacementText(String entity, String value) 1776 throws XmlPullParserException { 1777 if (processDocDecl) { 1778 throw new IllegalStateException( 1779 "Entity replacement text may not be defined with DOCTYPE processing enabled."); 1780 } 1781 if (reader == null) { 1782 throw new IllegalStateException( 1783 "Entity replacement text must be defined after setInput()"); 1784 } 1785 if (documentEntities == null) { 1786 documentEntities = new HashMap<String, char[]>(); 1787 } 1788 documentEntities.put(entity, value.toCharArray()); 1789 } 1790 1791 public Object getProperty(String property) { 1792 if (property.equals(PROPERTY_XMLDECL_VERSION)) { 1793 return version; 1794 } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) { 1795 return standalone; 1796 } else if (property.equals(PROPERTY_LOCATION)) { 1797 return location != null ? location : reader.toString(); 1798 } else { 1799 return null; 1800 } 1801 } 1802 1803 /** 1804 * Returns the root element's name if it was declared in the DTD. This 1805 * equals the first tag's name for valid documents. 1806 */ 1807 public String getRootElementName() { 1808 return rootElementName; 1809 } 1810 1811 /** 1812 * Returns the document's system ID if it was declared. This is typically a 1813 * string like {@code http://www.w3.org/TR/html4/strict.dtd}. 1814 */ 1815 public String getSystemId() { 1816 return systemId; 1817 } 1818 1819 /** 1820 * Returns the document's public ID if it was declared. This is typically a 1821 * string like {@code -//W3C//DTD HTML 4.01//EN}. 1822 */ 1823 public String getPublicId() { 1824 return publicId; 1825 } 1826 1827 public int getNamespaceCount(int depth) { 1828 if (depth > this.depth) { 1829 throw new IndexOutOfBoundsException(); 1830 } 1831 return nspCounts[depth]; 1832 } 1833 1834 public String getNamespacePrefix(int pos) { 1835 return nspStack[pos * 2]; 1836 } 1837 1838 public String getNamespaceUri(int pos) { 1839 return nspStack[(pos * 2) + 1]; 1840 } 1841 1842 public String getNamespace(String prefix) { 1843 if ("xml".equals(prefix)) { 1844 return "http://www.w3.org/XML/1998/namespace"; 1845 } 1846 if ("xmlns".equals(prefix)) { 1847 return "http://www.w3.org/2000/xmlns/"; 1848 } 1849 1850 for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) { 1851 if (prefix == null) { 1852 if (nspStack[i] == null) { 1853 return nspStack[i + 1]; 1854 } 1855 } else if (prefix.equals(nspStack[i])) { 1856 return nspStack[i + 1]; 1857 } 1858 } 1859 return null; 1860 } 1861 1862 public int getDepth() { 1863 return depth; 1864 } 1865 1866 public String getPositionDescription() { 1867 StringBuilder buf = new StringBuilder(type < TYPES.length ? TYPES[type] : "unknown"); 1868 buf.append(' '); 1869 1870 if (type == START_TAG || type == END_TAG) { 1871 if (degenerated) { 1872 buf.append("(empty) "); 1873 } 1874 buf.append('<'); 1875 if (type == END_TAG) { 1876 buf.append('/'); 1877 } 1878 1879 if (prefix != null) { 1880 buf.append("{" + namespace + "}" + prefix + ":"); 1881 } 1882 buf.append(name); 1883 1884 int cnt = attributeCount * 4; 1885 for (int i = 0; i < cnt; i += 4) { 1886 buf.append(' '); 1887 if (attributes[i + 1] != null) { 1888 buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":"); 1889 } 1890 buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'"); 1891 } 1892 1893 buf.append('>'); 1894 } else if (type == IGNORABLE_WHITESPACE) { 1895 ; 1896 } else if (type != TEXT) { 1897 buf.append(getText()); 1898 } else if (isWhitespace) { 1899 buf.append("(whitespace)"); 1900 } else { 1901 String text = getText(); 1902 if (text.length() > 16) { 1903 text = text.substring(0, 16) + "..."; 1904 } 1905 buf.append(text); 1906 } 1907 1908 buf.append("@" + getLineNumber() + ":" + getColumnNumber()); 1909 if (location != null) { 1910 buf.append(" in "); 1911 buf.append(location); 1912 } else if (reader != null) { 1913 buf.append(" in "); 1914 buf.append(reader.toString()); 1915 } 1916 return buf.toString(); 1917 } 1918 1919 public int getLineNumber() { 1920 int result = bufferStartLine; 1921 for (int i = 0; i < position; i++) { 1922 if (buffer[i] == '\n') { 1923 result++; 1924 } 1925 } 1926 return result + 1; // the first line is '1' 1927 } 1928 1929 public int getColumnNumber() { 1930 int result = bufferStartColumn; 1931 for (int i = 0; i < position; i++) { 1932 if (buffer[i] == '\n') { 1933 result = 0; 1934 } else { 1935 result++; 1936 } 1937 } 1938 return result + 1; // the first column is '1' 1939 } 1940 1941 public boolean isWhitespace() throws XmlPullParserException { 1942 if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) { 1943 throw new XmlPullParserException(ILLEGAL_TYPE, this, null); 1944 } 1945 return isWhitespace; 1946 } 1947 1948 public String getText() { 1949 if (type < TEXT || (type == ENTITY_REF && unresolved)) { 1950 return null; 1951 } else if (text == null) { 1952 return ""; 1953 } else { 1954 return text; 1955 } 1956 } 1957 1958 public char[] getTextCharacters(int[] poslen) { 1959 String text = getText(); 1960 if (text == null) { 1961 poslen[0] = -1; 1962 poslen[1] = -1; 1963 return null; 1964 } 1965 char[] result = text.toCharArray(); 1966 poslen[0] = 0; 1967 poslen[1] = result.length; 1968 return result; 1969 } 1970 1971 public String getNamespace() { 1972 return namespace; 1973 } 1974 1975 public String getName() { 1976 return name; 1977 } 1978 1979 public String getPrefix() { 1980 return prefix; 1981 } 1982 1983 public boolean isEmptyElementTag() throws XmlPullParserException { 1984 if (type != START_TAG) { 1985 throw new XmlPullParserException(ILLEGAL_TYPE, this, null); 1986 } 1987 return degenerated; 1988 } 1989 1990 public int getAttributeCount() { 1991 return attributeCount; 1992 } 1993 1994 public String getAttributeType(int index) { 1995 return "CDATA"; 1996 } 1997 1998 public boolean isAttributeDefault(int index) { 1999 return false; 2000 } 2001 2002 public String getAttributeNamespace(int index) { 2003 if (index >= attributeCount) { 2004 throw new IndexOutOfBoundsException(); 2005 } 2006 return attributes[index * 4]; 2007 } 2008 2009 public String getAttributeName(int index) { 2010 if (index >= attributeCount) { 2011 throw new IndexOutOfBoundsException(); 2012 } 2013 return attributes[(index * 4) + 2]; 2014 } 2015 2016 public String getAttributePrefix(int index) { 2017 if (index >= attributeCount) { 2018 throw new IndexOutOfBoundsException(); 2019 } 2020 return attributes[(index * 4) + 1]; 2021 } 2022 2023 public String getAttributeValue(int index) { 2024 if (index >= attributeCount) { 2025 throw new IndexOutOfBoundsException(); 2026 } 2027 return attributes[(index * 4) + 3]; 2028 } 2029 2030 public String getAttributeValue(String namespace, String name) { 2031 for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) { 2032 if (attributes[i + 2].equals(name) 2033 && (namespace == null || attributes[i].equals(namespace))) { 2034 return attributes[i + 3]; 2035 } 2036 } 2037 2038 return null; 2039 } 2040 2041 public int getEventType() throws XmlPullParserException { 2042 return type; 2043 } 2044 2045 // utility methods to make XML parsing easier ... 2046 2047 public int nextTag() throws XmlPullParserException, IOException { 2048 next(); 2049 if (type == TEXT && isWhitespace) { 2050 next(); 2051 } 2052 2053 if (type != END_TAG && type != START_TAG) { 2054 throw new XmlPullParserException("unexpected type", this, null); 2055 } 2056 2057 return type; 2058 } 2059 2060 public void require(int type, String namespace, String name) 2061 throws XmlPullParserException, IOException { 2062 if (type != this.type 2063 || (namespace != null && !namespace.equals(getNamespace())) 2064 || (name != null && !name.equals(getName()))) { 2065 throw new XmlPullParserException( 2066 "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null); 2067 } 2068 } 2069 2070 public String nextText() throws XmlPullParserException, IOException { 2071 if (type != START_TAG) { 2072 throw new XmlPullParserException("precondition: START_TAG", this, null); 2073 } 2074 2075 next(); 2076 2077 String result; 2078 if (type == TEXT) { 2079 result = getText(); 2080 next(); 2081 } else { 2082 result = ""; 2083 } 2084 2085 if (type != END_TAG) { 2086 throw new XmlPullParserException("END_TAG expected", this, null); 2087 } 2088 2089 return result; 2090 } 2091 2092 public void setFeature(String feature, boolean value) throws XmlPullParserException { 2093 if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { 2094 processNsp = value; 2095 } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) { 2096 processDocDecl = value; 2097 } else if (FEATURE_RELAXED.equals(feature)) { 2098 relaxed = value; 2099 } else { 2100 throw new XmlPullParserException("unsupported feature: " + feature, this, null); 2101 } 2102 } 2103 2104 public void setProperty(String property, Object value) throws XmlPullParserException { 2105 if (property.equals(PROPERTY_LOCATION)) { 2106 location = String.valueOf(value); 2107 } else { 2108 throw new XmlPullParserException("unsupported property: " + property); 2109 } 2110 } 2111 2112 /** 2113 * A chain of buffers containing XML content. Each content source contains 2114 * the parser's primary read buffer or the characters of entities actively 2115 * being parsed. 2116 * 2117 * <p>For example, note the buffers needed to parse this document: 2118 * <pre> {@code 2119 * <!DOCTYPE foo [ 2120 * <!ENTITY baz "ghi"> 2121 * <!ENTITY bar "def &baz; jkl"> 2122 * ]> 2123 * <foo>abc &bar; mno</foo> 2124 * }</pre> 2125 * 2126 * <p>Things get interesting when the bar entity is encountered. At that 2127 * point two buffers are active: 2128 * <ol> 2129 * <li>The value for the bar entity, containing {@code "def &baz; jkl"} 2130 * <li>The parser's primary read buffer, containing {@code " mno</foo>"} 2131 * </ol> 2132 * <p>The parser will return the characters {@code "def "} from the bar 2133 * entity's buffer, and then it will encounter the baz entity. To handle 2134 * that, three buffers will be active: 2135 * <ol> 2136 * <li>The value for the baz entity, containing {@code "ghi"} 2137 * <li>The remaining value for the bar entity, containing {@code " jkl"} 2138 * <li>The parser's primary read buffer, containing {@code " mno</foo>"} 2139 * </ol> 2140 * <p>The parser will then return the characters {@code ghi jkl mno} in that 2141 * sequence by reading each buffer in sequence. 2142 */ 2143 static class ContentSource { 2144 private final ContentSource next; 2145 private final char[] buffer; 2146 private final int position; 2147 private final int limit; 2148 ContentSource(ContentSource next, char[] buffer, int position, int limit) { 2149 this.next = next; 2150 this.buffer = buffer; 2151 this.position = position; 2152 this.limit = limit; 2153 } 2154 } 2155 2156 /** 2157 * Prepends the characters of {@code newBuffer} to be read before the 2158 * current buffer. 2159 */ 2160 private void pushContentSource(char[] newBuffer) { 2161 nextContentSource = new ContentSource(nextContentSource, buffer, position, limit); 2162 buffer = newBuffer; 2163 position = 0; 2164 limit = newBuffer.length; 2165 } 2166 2167 /** 2168 * Replaces the current exhausted buffer with the next buffer in the chain. 2169 */ 2170 private void popContentSource() { 2171 buffer = nextContentSource.buffer; 2172 position = nextContentSource.position; 2173 limit = nextContentSource.limit; 2174 nextContentSource = nextContentSource.next; 2175 } 2176 } 2177