1 /* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 * sell copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 * IN THE SOFTWARE. */ 20 21 // Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode) 22 23 package org.kxml2.io; 24 25 import java.io.Closeable; 26 import java.io.IOException; 27 import java.io.InputStream; 28 import java.io.InputStreamReader; 29 import java.io.Reader; 30 import java.util.HashMap; 31 import java.util.Map; 32 import libcore.internal.StringPool; 33 import org.xmlpull.v1.XmlPullParser; 34 import org.xmlpull.v1.XmlPullParserException; 35 36 /** 37 * An XML pull parser with limited support for parsing internal DTDs. 38 */ 39 public class KXmlParser implements XmlPullParser, Closeable { 40 41 private static final String PROPERTY_XMLDECL_VERSION 42 = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version"; 43 private static final String PROPERTY_XMLDECL_STANDALONE 44 = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone"; 45 private static final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location"; 46 private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed"; 47 48 private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>(); 49 static { 50 DEFAULT_ENTITIES.put("lt", "<"); 51 DEFAULT_ENTITIES.put("gt", ">"); 52 DEFAULT_ENTITIES.put("amp", "&"); 53 DEFAULT_ENTITIES.put("apos", "'"); 54 DEFAULT_ENTITIES.put("quot", "\""); 55 } 56 57 private static final int ELEMENTDECL = 11; 58 private static final int ENTITYDECL = 12; 59 private static final int ATTLISTDECL = 13; 60 private static final int NOTATIONDECL = 14; 61 private static final int PARAMETER_ENTITY_REF = 15; 62 private static final char[] START_COMMENT = { '<', '!', '-', '-' }; 63 private static final char[] END_COMMENT = { '-', '-', '>' }; 64 private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' }; 65 private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' }; 66 private static final char[] END_CDATA = { ']', ']', '>' }; 67 private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' }; 68 private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' }; 69 private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' }; 70 private static final char[] SYSTEM = { 'S', 'Y', 'S', 'T', 'E', 'M' }; 71 private static final char[] PUBLIC = { 'P', 'U', 'B', 'L', 'I', 'C' }; 72 private static final char[] START_ELEMENT = { '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T' }; 73 private static final char[] START_ATTLIST = { '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T' }; 74 private static final char[] START_ENTITY = { '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y' }; 75 private static final char[] START_NOTATION = { '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' }; 76 private static final char[] EMPTY = new char[] { 'E', 'M', 'P', 'T', 'Y' }; 77 private static final char[] ANY = new char[]{ 'A', 'N', 'Y' }; 78 private static final char[] NDATA = new char[]{ 'N', 'D', 'A', 'T', 'A' }; 79 private static final char[] NOTATION = new char[]{ 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' }; 80 private static final char[] REQUIRED = new char[] { 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D' }; 81 private static final char[] IMPLIED = new char[] { 'I', 'M', 'P', 'L', 'I', 'E', 'D' }; 82 private static final char[] FIXED = new char[] { 'F', 'I', 'X', 'E', 'D' }; 83 84 static final private String UNEXPECTED_EOF = "Unexpected EOF"; 85 static final private String ILLEGAL_TYPE = "Wrong event type"; 86 static final private int XML_DECLARATION = 998; 87 88 // general 89 private String location; 90 91 private String version; 92 private Boolean standalone; 93 private String rootElementName; 94 private String systemId; 95 private String publicId; 96 97 /** 98 * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines 99 * entity values and default attribute values. These values are parsed at 100 * inclusion time and may contain both tags and entity references. 101 * 102 * <p>If this is false, the user must {@link #defineEntityReplacementText 103 * define entity values manually}. Such entity values are literal strings 104 * and will not be parsed. There is no API to define default attributes 105 * manually. 106 */ 107 private boolean processDocDecl; 108 private boolean processNsp; 109 private boolean relaxed; 110 private boolean keepNamespaceAttributes; 111 112 /** 113 * If non-null, the contents of the read buffer must be copied into this 114 * string builder before the read buffer is overwritten. This is used to 115 * capture the raw DTD text while parsing the DTD. 116 */ 117 private StringBuilder bufferCapture; 118 119 /** 120 * Entities defined in or for this document. This map is created lazily. 121 */ 122 private Map<String, char[]> documentEntities; 123 124 /** 125 * Default attributes in this document. The outer map's key is the element 126 * name; the inner map's key is the attribute name. Both keys should be 127 * without namespace adjustments. This map is created lazily. 128 */ 129 private Map<String, Map<String, String>> defaultAttributes; 130 131 132 private int depth; 133 private String[] elementStack = new String[16]; 134 private String[] nspStack = new String[8]; 135 private int[] nspCounts = new int[4]; 136 137 // source 138 139 private Reader reader; 140 private String encoding; 141 private ContentSource nextContentSource; 142 private char[] buffer = new char[8192]; 143 private int position = 0; 144 private int limit = 0; 145 146 /* 147 * Track the number of newlines and columns preceding the current buffer. To 148 * compute the line and column of a position in the buffer, compute the line 149 * and column in the buffer and add the preceding values. 150 */ 151 private int bufferStartLine; 152 private int bufferStartColumn; 153 154 // the current token 155 156 private int type; 157 private boolean isWhitespace; 158 private String namespace; 159 private String prefix; 160 private String name; 161 private String text; 162 163 private boolean degenerated; 164 private int attributeCount; 165 166 /* 167 * The current element's attributes arranged in groups of 4: 168 * i + 0 = attribute namespace URI 169 * i + 1 = attribute namespace prefix 170 * i + 2 = attribute qualified name (may contain ":", as in "html:h1") 171 * i + 3 = attribute value 172 */ 173 private String[] attributes = new String[16]; 174 175 private String error; 176 177 private boolean unresolved; 178 179 public final StringPool stringPool = new StringPool(); 180 181 /** 182 * Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"} 183 * in pulled elements. Most applications will only be interested in the effective namespaces of 184 * their elements, so these attributes aren't useful. But for structure preserving wrappers like 185 * DOM, it is necessary to keep the namespace data around. 186 */ 187 public void keepNamespaceAttributes() { 188 this.keepNamespaceAttributes = true; 189 } 190 191 private boolean adjustNsp() throws XmlPullParserException { 192 boolean any = false; 193 194 for (int i = 0; i < attributeCount << 2; i += 4) { 195 String attrName = attributes[i + 2]; 196 int cut = attrName.indexOf(':'); 197 String prefix; 198 199 if (cut != -1) { 200 prefix = attrName.substring(0, cut); 201 attrName = attrName.substring(cut + 1); 202 } else if (attrName.equals("xmlns")) { 203 prefix = attrName; 204 attrName = null; 205 } else { 206 continue; 207 } 208 209 if (!prefix.equals("xmlns")) { 210 any = true; 211 } else { 212 int j = (nspCounts[depth]++) << 1; 213 214 nspStack = ensureCapacity(nspStack, j + 2); 215 nspStack[j] = attrName; 216 nspStack[j + 1] = attributes[i + 3]; 217 218 if (attrName != null && attributes[i + 3].isEmpty()) { 219 checkRelaxed("illegal empty namespace"); 220 } 221 222 if (keepNamespaceAttributes) { 223 // explicitly set the namespace for unprefixed attributes 224 // such as xmlns="http://foo" 225 attributes[i] = "http://www.w3.org/2000/xmlns/"; 226 any = true; 227 } else { 228 System.arraycopy( 229 attributes, 230 i + 4, 231 attributes, 232 i, 233 ((--attributeCount) << 2) - i); 234 235 i -= 4; 236 } 237 } 238 } 239 240 if (any) { 241 for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) { 242 243 String attrName = attributes[i + 2]; 244 int cut = attrName.indexOf(':'); 245 246 if (cut == 0 && !relaxed) { 247 throw new RuntimeException( 248 "illegal attribute name: " + attrName + " at " + this); 249 } else if (cut != -1) { 250 String attrPrefix = attrName.substring(0, cut); 251 252 attrName = attrName.substring(cut + 1); 253 254 String attrNs = getNamespace(attrPrefix); 255 256 if (attrNs == null && !relaxed) { 257 throw new RuntimeException( 258 "Undefined Prefix: " + attrPrefix + " in " + this); 259 } 260 261 attributes[i] = attrNs; 262 attributes[i + 1] = attrPrefix; 263 attributes[i + 2] = attrName; 264 } 265 } 266 } 267 268 int cut = name.indexOf(':'); 269 270 if (cut == 0) { 271 checkRelaxed("illegal tag name: " + name); 272 } 273 274 if (cut != -1) { 275 prefix = name.substring(0, cut); 276 name = name.substring(cut + 1); 277 } 278 279 this.namespace = getNamespace(prefix); 280 281 if (this.namespace == null) { 282 if (prefix != null) { 283 checkRelaxed("undefined prefix: " + prefix); 284 } 285 this.namespace = NO_NAMESPACE; 286 } 287 288 return any; 289 } 290 291 private String[] ensureCapacity(String[] arr, int required) { 292 if (arr.length >= required) { 293 return arr; 294 } 295 String[] bigger = new String[required + 16]; 296 System.arraycopy(arr, 0, bigger, 0, arr.length); 297 return bigger; 298 } 299 300 private void checkRelaxed(String errorMessage) throws XmlPullParserException { 301 if (!relaxed) { 302 throw new XmlPullParserException(errorMessage, this, null); 303 } 304 if (error == null) { 305 error = "Error: " + errorMessage; 306 } 307 } 308 309 public int next() throws XmlPullParserException, IOException { 310 return next(false); 311 } 312 313 public int nextToken() throws XmlPullParserException, IOException { 314 return next(true); 315 } 316 317 private int next(boolean justOneToken) throws IOException, XmlPullParserException { 318 if (reader == null) { 319 throw new XmlPullParserException("setInput() must be called first.", this, null); 320 } 321 322 if (type == END_TAG) { 323 depth--; 324 } 325 326 // degenerated needs to be handled before error because of possible 327 // processor expectations(!) 328 329 if (degenerated) { 330 degenerated = false; 331 type = END_TAG; 332 return type; 333 } 334 335 if (error != null) { 336 if (justOneToken) { 337 text = error; 338 type = COMMENT; 339 error = null; 340 return type; 341 } else { 342 error = null; 343 } 344 } 345 346 type = peekType(false); 347 348 if (type == XML_DECLARATION) { 349 readXmlDeclaration(); 350 type = peekType(false); 351 } 352 353 text = null; 354 isWhitespace = true; 355 prefix = null; 356 name = null; 357 namespace = null; 358 attributeCount = -1; 359 boolean throwOnResolveFailure = !justOneToken; 360 361 while (true) { 362 switch (type) { 363 364 /* 365 * Return immediately after encountering a start tag, end tag, or 366 * the end of the document. 367 */ 368 case START_TAG: 369 parseStartTag(false, throwOnResolveFailure); 370 return type; 371 case END_TAG: 372 readEndTag(); 373 return type; 374 case END_DOCUMENT: 375 return type; 376 377 /* 378 * Return after any text token when we're looking for a single 379 * token. Otherwise concatenate all text between tags. 380 */ 381 case ENTITY_REF: 382 if (justOneToken) { 383 StringBuilder entityTextBuilder = new StringBuilder(); 384 readEntity(entityTextBuilder, true, throwOnResolveFailure, ValueContext.TEXT); 385 text = entityTextBuilder.toString(); 386 break; 387 } 388 // fall-through 389 case TEXT: 390 text = readValue('<', !justOneToken, throwOnResolveFailure, ValueContext.TEXT); 391 if (depth == 0 && isWhitespace) { 392 type = IGNORABLE_WHITESPACE; 393 } 394 break; 395 case CDSECT: 396 read(START_CDATA); 397 text = readUntil(END_CDATA, true); 398 break; 399 400 /* 401 * Comments, processing instructions and declarations are returned 402 * when we're looking for a single token. Otherwise they're skipped. 403 */ 404 case COMMENT: 405 String commentText = readComment(justOneToken); 406 if (justOneToken) { 407 text = commentText; 408 } 409 break; 410 case PROCESSING_INSTRUCTION: 411 read(START_PROCESSING_INSTRUCTION); 412 String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken); 413 if (justOneToken) { 414 text = processingInstruction; 415 } 416 break; 417 case DOCDECL: 418 readDoctype(justOneToken); 419 break; 420 421 default: 422 throw new XmlPullParserException("Unexpected token", this, null); 423 } 424 425 if (depth == 0 && (type == ENTITY_REF || type == TEXT || type == CDSECT)) { 426 throw new XmlPullParserException("Unexpected token", this, null); 427 } 428 429 if (justOneToken) { 430 return type; 431 } 432 433 if (type == IGNORABLE_WHITESPACE) { 434 text = null; 435 } 436 437 /* 438 * We've read all that we can of a non-empty text block. Always 439 * report this as text, even if it was a CDATA block or entity 440 * reference. 441 */ 442 int peek = peekType(false); 443 if (text != null && !text.isEmpty() && peek < TEXT) { 444 type = TEXT; 445 return type; 446 } 447 448 type = peek; 449 } 450 } 451 452 /** 453 * Reads text until the specified delimiter is encountered. Consumes the 454 * text and the delimiter. 455 * 456 * @param returnText true to return the read text excluding the delimiter; 457 * false to return null. 458 */ 459 private String readUntil(char[] delimiter, boolean returnText) 460 throws IOException, XmlPullParserException { 461 int start = position; 462 StringBuilder result = null; 463 464 if (returnText && text != null) { 465 result = new StringBuilder(); 466 result.append(text); 467 } 468 469 search: 470 while (true) { 471 if (position + delimiter.length >= limit) { 472 if (start < position && returnText) { 473 if (result == null) { 474 result = new StringBuilder(); 475 } 476 result.append(buffer, start, position - start); 477 } 478 if (!fillBuffer(delimiter.length)) { 479 checkRelaxed(UNEXPECTED_EOF); 480 type = COMMENT; 481 return null; 482 } 483 start = position; 484 } 485 486 // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length) 487 // when the VM has better method inlining 488 for (int i = 0; i < delimiter.length; i++) { 489 if (buffer[position + i] != delimiter[i]) { 490 position++; 491 continue search; 492 } 493 } 494 495 break; 496 } 497 498 int end = position; 499 position += delimiter.length; 500 501 if (!returnText) { 502 return null; 503 } else if (result == null) { 504 return stringPool.get(buffer, start, end - start); 505 } else { 506 result.append(buffer, start, end - start); 507 return result.toString(); 508 } 509 } 510 511 /** 512 * Returns true if an XML declaration was read. 513 */ 514 private void readXmlDeclaration() throws IOException, XmlPullParserException { 515 if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) { 516 checkRelaxed("processing instructions must not start with xml"); 517 } 518 519 read(START_PROCESSING_INSTRUCTION); 520 parseStartTag(true, true); 521 522 if (attributeCount < 1 || !"version".equals(attributes[2])) { 523 checkRelaxed("version expected"); 524 } 525 526 version = attributes[3]; 527 528 int pos = 1; 529 530 if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) { 531 encoding = attributes[3 + 4]; 532 pos++; 533 } 534 535 if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) { 536 String st = attributes[3 + 4 * pos]; 537 if ("yes".equals(st)) { 538 standalone = Boolean.TRUE; 539 } else if ("no".equals(st)) { 540 standalone = Boolean.FALSE; 541 } else { 542 checkRelaxed("illegal standalone value: " + st); 543 } 544 pos++; 545 } 546 547 if (pos != attributeCount) { 548 checkRelaxed("unexpected attributes in XML declaration"); 549 } 550 551 isWhitespace = true; 552 text = null; 553 } 554 555 private String readComment(boolean returnText) throws IOException, XmlPullParserException { 556 read(START_COMMENT); 557 558 if (relaxed) { 559 return readUntil(END_COMMENT, returnText); 560 } 561 562 String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText); 563 if (peekCharacter() != '>') { 564 throw new XmlPullParserException("Comments may not contain --", this, null); 565 } 566 position++; 567 return commentText; 568 } 569 570 /** 571 * Read the document's DTD. Although this parser is non-validating, the DTD 572 * must be parsed to capture entity values and default attribute values. 573 */ 574 private void readDoctype(boolean saveDtdText) throws IOException, XmlPullParserException { 575 read(START_DOCTYPE); 576 577 int startPosition = -1; 578 if (saveDtdText) { 579 bufferCapture = new StringBuilder(); 580 startPosition = position; 581 } 582 try { 583 skip(); 584 rootElementName = readName(); 585 readExternalId(true, true); 586 skip(); 587 if (peekCharacter() == '[') { 588 readInternalSubset(); 589 } 590 skip(); 591 } finally { 592 if (saveDtdText) { 593 bufferCapture.append(buffer, 0, position); 594 bufferCapture.delete(0, startPosition); 595 text = bufferCapture.toString(); 596 bufferCapture = null; 597 } 598 } 599 600 read('>'); 601 } 602 603 /** 604 * Reads an external ID of one of these two forms: 605 * SYSTEM "quoted system name" 606 * PUBLIC "quoted public id" "quoted system name" 607 * 608 * If the system name is not required, this also supports lone public IDs of 609 * this form: 610 * PUBLIC "quoted public id" 611 * 612 * Returns true if any ID was read. 613 */ 614 private boolean readExternalId(boolean requireSystemName, boolean assignFields) 615 throws IOException, XmlPullParserException { 616 skip(); 617 int c = peekCharacter(); 618 619 if (c == 'S') { 620 read(SYSTEM); 621 } else if (c == 'P') { 622 read(PUBLIC); 623 skip(); 624 if (assignFields) { 625 publicId = readQuotedId(true); 626 } else { 627 readQuotedId(false); 628 } 629 } else { 630 return false; 631 } 632 633 skip(); 634 635 if (!requireSystemName) { 636 int delimiter = peekCharacter(); 637 if (delimiter != '"' && delimiter != '\'') { 638 return true; // no system name! 639 } 640 } 641 642 if (assignFields) { 643 systemId = readQuotedId(true); 644 } else { 645 readQuotedId(false); 646 } 647 return true; 648 } 649 650 private static final char[] SINGLE_QUOTE = new char[] { '\'' }; 651 private static final char[] DOUBLE_QUOTE = new char[] { '"' }; 652 653 /** 654 * Reads a quoted string, performing no entity escaping of the contents. 655 */ 656 private String readQuotedId(boolean returnText) throws IOException, XmlPullParserException { 657 int quote = peekCharacter(); 658 char[] delimiter; 659 if (quote == '"') { 660 delimiter = DOUBLE_QUOTE; 661 } else if (quote == '\'') { 662 delimiter = SINGLE_QUOTE; 663 } else { 664 throw new XmlPullParserException("Expected a quoted string", this, null); 665 } 666 position++; 667 return readUntil(delimiter, returnText); 668 } 669 670 private void readInternalSubset() throws IOException, XmlPullParserException { 671 read('['); 672 673 while (true) { 674 skip(); 675 if (peekCharacter() == ']') { 676 position++; 677 return; 678 } 679 680 int declarationType = peekType(true); 681 switch (declarationType) { 682 case ELEMENTDECL: 683 readElementDeclaration(); 684 break; 685 686 case ATTLISTDECL: 687 readAttributeListDeclaration(); 688 break; 689 690 case ENTITYDECL: 691 readEntityDeclaration(); 692 break; 693 694 case NOTATIONDECL: 695 readNotationDeclaration(); 696 break; 697 698 case PROCESSING_INSTRUCTION: 699 read(START_PROCESSING_INSTRUCTION); 700 readUntil(END_PROCESSING_INSTRUCTION, false); 701 break; 702 703 case COMMENT: 704 readComment(false); 705 break; 706 707 case PARAMETER_ENTITY_REF: 708 throw new XmlPullParserException( 709 "Parameter entity references are not supported", this, null); 710 711 default: 712 throw new XmlPullParserException("Unexpected token", this, null); 713 } 714 } 715 } 716 717 /** 718 * Read an element declaration. This contains a name and a content spec. 719 * <!ELEMENT foo EMPTY > 720 * <!ELEMENT foo (bar?,(baz|quux)) > 721 * <!ELEMENT foo (#PCDATA|bar)* > 722 */ 723 private void readElementDeclaration() throws IOException, XmlPullParserException { 724 read(START_ELEMENT); 725 skip(); 726 readName(); 727 readContentSpec(); 728 skip(); 729 read('>'); 730 } 731 732 /** 733 * Read an element content spec. This is a regular expression-like pattern 734 * of names or other content specs. The following operators are supported: 735 * sequence: (a,b,c) 736 * choice: (a|b|c) 737 * optional: a? 738 * one or more: a+ 739 * any number: a* 740 * 741 * The special name '#PCDATA' is permitted but only if it is the first 742 * element of the first group: 743 * (#PCDATA|a|b) 744 * 745 * The top-level element must be either a choice, a sequence, or one of the 746 * special names EMPTY and ANY. 747 */ 748 private void readContentSpec() throws IOException, XmlPullParserException { 749 // this implementation is very lenient; it scans for balanced parens only 750 skip(); 751 int c = peekCharacter(); 752 if (c == '(') { 753 int depth = 0; 754 do { 755 if (c == '(') { 756 depth++; 757 } else if (c == ')') { 758 depth--; 759 } 760 position++; 761 c = peekCharacter(); 762 } while (depth > 0); 763 764 if (c == '*' || c == '?' || c == '+') { 765 position++; 766 } 767 } else if (c == EMPTY[0]) { 768 read(EMPTY); 769 } else if (c == ANY[0]) { 770 read(ANY); 771 } else { 772 throw new XmlPullParserException("Expected element content spec", this, null); 773 } 774 } 775 776 /** 777 * Reads an attribute list declaration such as the following: 778 * <!ATTLIST foo 779 * bar CDATA #IMPLIED 780 * quux (a|b|c) "c" 781 * baz NOTATION (a|b|c) #FIXED "c"> 782 * 783 * Each attribute has a name, type and default. 784 * 785 * Types are one of the built-in types (CDATA, ID, IDREF, IDREFS, ENTITY, 786 * ENTITIES, NMTOKEN, or NMTOKENS), an enumerated type "(list|of|options)" 787 * or NOTATION followed by an enumerated type. 788 * 789 * The default is either #REQUIRED, #IMPLIED, #FIXED, a quoted value, or 790 * #FIXED with a quoted value. 791 */ 792 private void readAttributeListDeclaration() throws IOException, XmlPullParserException { 793 read(START_ATTLIST); 794 skip(); 795 String elementName = readName(); 796 797 while (true) { 798 skip(); 799 int c = peekCharacter(); 800 if (c == '>') { 801 position++; 802 return; 803 } 804 805 // attribute name 806 String attributeName = readName(); 807 808 // attribute type 809 skip(); 810 if (position + 1 >= limit && !fillBuffer(2)) { 811 throw new XmlPullParserException("Malformed attribute list", this, null); 812 } 813 if (buffer[position] == NOTATION[0] && buffer[position + 1] == NOTATION[1]) { 814 read(NOTATION); 815 skip(); 816 } 817 c = peekCharacter(); 818 if (c == '(') { 819 position++; 820 while (true) { 821 skip(); 822 readName(); 823 skip(); 824 c = peekCharacter(); 825 if (c == ')') { 826 position++; 827 break; 828 } else if (c == '|') { 829 position++; 830 } else { 831 throw new XmlPullParserException("Malformed attribute type", this, null); 832 } 833 } 834 } else { 835 readName(); 836 } 837 838 // default value 839 skip(); 840 c = peekCharacter(); 841 if (c == '#') { 842 position++; 843 c = peekCharacter(); 844 if (c == 'R') { 845 read(REQUIRED); 846 } else if (c == 'I') { 847 read(IMPLIED); 848 } else if (c == 'F') { 849 read(FIXED); 850 } else { 851 throw new XmlPullParserException("Malformed attribute type", this, null); 852 } 853 skip(); 854 c = peekCharacter(); 855 } 856 if (c == '"' || c == '\'') { 857 position++; 858 // TODO: does this do escaping correctly? 859 String value = readValue((char) c, true, true, ValueContext.ATTRIBUTE); 860 position++; 861 defineAttributeDefault(elementName, attributeName, value); 862 } 863 } 864 } 865 866 private void defineAttributeDefault(String elementName, String attributeName, String value) { 867 if (defaultAttributes == null) { 868 defaultAttributes = new HashMap<String, Map<String, String>>(); 869 } 870 Map<String, String> elementAttributes = defaultAttributes.get(elementName); 871 if (elementAttributes == null) { 872 elementAttributes = new HashMap<String, String>(); 873 defaultAttributes.put(elementName, elementAttributes); 874 } 875 elementAttributes.put(attributeName, value); 876 } 877 878 /** 879 * Read an entity declaration. The value of internal entities are inline: 880 * <!ENTITY foo "bar"> 881 * 882 * The values of external entities must be retrieved by URL or path: 883 * <!ENTITY foo SYSTEM "http://host/file"> 884 * <!ENTITY foo PUBLIC "-//Android//Foo//EN" "http://host/file"> 885 * <!ENTITY foo SYSTEM "../file.png" NDATA png> 886 * 887 * Entities may be general or parameterized. Parameterized entities are 888 * marked by a percent sign. Such entities may only be used in the DTD: 889 * <!ENTITY % foo "bar"> 890 */ 891 private void readEntityDeclaration() throws IOException, XmlPullParserException { 892 read(START_ENTITY); 893 boolean generalEntity = true; 894 895 skip(); 896 if (peekCharacter() == '%') { 897 generalEntity = false; 898 position++; 899 skip(); 900 } 901 902 String name = readName(); 903 904 skip(); 905 int quote = peekCharacter(); 906 String entityValue; 907 if (quote == '"' || quote == '\'') { 908 position++; 909 entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION); 910 position++; 911 } else if (readExternalId(true, false)) { 912 /* 913 * Map external entities to the empty string. This is dishonest, 914 * but it's consistent with Android's Expat pull parser. 915 */ 916 entityValue = ""; 917 skip(); 918 if (peekCharacter() == NDATA[0]) { 919 read(NDATA); 920 skip(); 921 readName(); 922 } 923 } else { 924 throw new XmlPullParserException("Expected entity value or external ID", this, null); 925 } 926 927 if (generalEntity && processDocDecl) { 928 if (documentEntities == null) { 929 documentEntities = new HashMap<String, char[]>(); 930 } 931 documentEntities.put(name, entityValue.toCharArray()); 932 } 933 934 skip(); 935 read('>'); 936 } 937 938 private void readNotationDeclaration() throws IOException, XmlPullParserException { 939 read(START_NOTATION); 940 skip(); 941 readName(); 942 if (!readExternalId(false, false)) { 943 throw new XmlPullParserException( 944 "Expected external ID or public ID for notation", this, null); 945 } 946 skip(); 947 read('>'); 948 } 949 950 private void readEndTag() throws IOException, XmlPullParserException { 951 read('<'); 952 read('/'); 953 name = readName(); // TODO: pass the expected name in as a hint? 954 skip(); 955 read('>'); 956 957 int sp = (depth - 1) * 4; 958 959 if (depth == 0) { 960 checkRelaxed("read end tag " + name + " with no tags open"); 961 type = COMMENT; 962 return; 963 } 964 965 if (name.equals(elementStack[sp + 3])) { 966 namespace = elementStack[sp]; 967 prefix = elementStack[sp + 1]; 968 name = elementStack[sp + 2]; 969 } else if (!relaxed) { 970 throw new XmlPullParserException( 971 "expected: /" + elementStack[sp + 3] + " read: " + name, this, null); 972 } 973 } 974 975 /** 976 * Returns the type of the next token. 977 */ 978 private int peekType(boolean inDeclaration) throws IOException, XmlPullParserException { 979 if (position >= limit && !fillBuffer(1)) { 980 return END_DOCUMENT; 981 } 982 983 switch (buffer[position]) { 984 case '&': 985 return ENTITY_REF; // & 986 case '<': 987 if (position + 3 >= limit && !fillBuffer(4)) { 988 throw new XmlPullParserException("Dangling <", this, null); 989 } 990 991 switch (buffer[position + 1]) { 992 case '/': 993 return END_TAG; // </ 994 case '?': 995 // we're looking for "<?xml " with case insensitivity 996 if ((position + 5 < limit || fillBuffer(6)) 997 && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X') 998 && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M') 999 && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L') 1000 && (buffer[position + 5] == ' ')) { 1001 return XML_DECLARATION; // <?xml 1002 } else { 1003 return PROCESSING_INSTRUCTION; // <? 1004 } 1005 case '!': 1006 switch (buffer[position + 2]) { 1007 case 'D': 1008 return DOCDECL; // <!D 1009 case '[': 1010 return CDSECT; // <![ 1011 case '-': 1012 return COMMENT; // <!- 1013 case 'E': 1014 switch (buffer[position + 3]) { 1015 case 'L': 1016 return ELEMENTDECL; // <!EL 1017 case 'N': 1018 return ENTITYDECL; // <!EN 1019 } 1020 break; 1021 case 'A': 1022 return ATTLISTDECL; // <!A 1023 case 'N': 1024 return NOTATIONDECL; // <!N 1025 } 1026 throw new XmlPullParserException("Unexpected <!", this, null); 1027 default: 1028 return START_TAG; // < 1029 } 1030 case '%': 1031 return inDeclaration ? PARAMETER_ENTITY_REF : TEXT; 1032 default: 1033 return TEXT; 1034 } 1035 } 1036 1037 /** 1038 * Sets name and attributes 1039 */ 1040 private void parseStartTag(boolean xmldecl, boolean throwOnResolveFailure) 1041 throws IOException, XmlPullParserException { 1042 if (!xmldecl) { 1043 read('<'); 1044 } 1045 name = readName(); 1046 attributeCount = 0; 1047 1048 while (true) { 1049 skip(); 1050 1051 if (position >= limit && !fillBuffer(1)) { 1052 checkRelaxed(UNEXPECTED_EOF); 1053 return; 1054 } 1055 1056 int c = buffer[position]; 1057 1058 if (xmldecl) { 1059 if (c == '?') { 1060 position++; 1061 read('>'); 1062 return; 1063 } 1064 } else { 1065 if (c == '/') { 1066 degenerated = true; 1067 position++; 1068 skip(); 1069 read('>'); 1070 break; 1071 } else if (c == '>') { 1072 position++; 1073 break; 1074 } 1075 } 1076 1077 String attrName = readName(); 1078 1079 int i = (attributeCount++) * 4; 1080 attributes = ensureCapacity(attributes, i + 4); 1081 attributes[i] = ""; 1082 attributes[i + 1] = null; 1083 attributes[i + 2] = attrName; 1084 1085 skip(); 1086 if (position >= limit && !fillBuffer(1)) { 1087 checkRelaxed(UNEXPECTED_EOF); 1088 return; 1089 } 1090 1091 if (buffer[position] == '=') { 1092 position++; 1093 1094 skip(); 1095 if (position >= limit && !fillBuffer(1)) { 1096 checkRelaxed(UNEXPECTED_EOF); 1097 return; 1098 } 1099 char delimiter = buffer[position]; 1100 1101 if (delimiter == '\'' || delimiter == '"') { 1102 position++; 1103 } else if (relaxed) { 1104 delimiter = ' '; 1105 } else { 1106 throw new XmlPullParserException("attr value delimiter missing!", this, null); 1107 } 1108 1109 attributes[i + 3] = readValue(delimiter, true, throwOnResolveFailure, 1110 ValueContext.ATTRIBUTE); 1111 1112 if (delimiter != ' ') { 1113 position++; // end quote 1114 } 1115 } else if (relaxed) { 1116 attributes[i + 3] = attrName; 1117 } else { 1118 checkRelaxed("Attr.value missing f. " + attrName); 1119 attributes[i + 3] = attrName; 1120 } 1121 } 1122 1123 int sp = depth++ * 4; 1124 elementStack = ensureCapacity(elementStack, sp + 4); 1125 elementStack[sp + 3] = name; 1126 1127 if (depth >= nspCounts.length) { 1128 int[] bigger = new int[depth + 4]; 1129 System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length); 1130 nspCounts = bigger; 1131 } 1132 1133 nspCounts[depth] = nspCounts[depth - 1]; 1134 1135 if (processNsp) { 1136 adjustNsp(); 1137 } else { 1138 namespace = ""; 1139 } 1140 1141 // For consistency with Expat, add default attributes after fixing namespaces. 1142 if (defaultAttributes != null) { 1143 Map<String, String> elementDefaultAttributes = defaultAttributes.get(name); 1144 if (elementDefaultAttributes != null) { 1145 for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) { 1146 if (getAttributeValue(null, entry.getKey()) != null) { 1147 continue; // an explicit value overrides the default 1148 } 1149 1150 int i = (attributeCount++) * 4; 1151 attributes = ensureCapacity(attributes, i + 4); 1152 attributes[i] = ""; 1153 attributes[i + 1] = null; 1154 attributes[i + 2] = entry.getKey(); 1155 attributes[i + 3] = entry.getValue(); 1156 } 1157 } 1158 } 1159 1160 elementStack[sp] = namespace; 1161 elementStack[sp + 1] = prefix; 1162 elementStack[sp + 2] = name; 1163 } 1164 1165 /** 1166 * Reads an entity reference from the buffer, resolves it, and writes the 1167 * resolved entity to {@code out}. If the entity cannot be read or resolved, 1168 * {@code out} will contain the partial entity reference. 1169 */ 1170 private void readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure, 1171 ValueContext valueContext) throws IOException, XmlPullParserException { 1172 int start = out.length(); 1173 1174 if (buffer[position++] != '&') { 1175 throw new AssertionError(); 1176 } 1177 1178 out.append('&'); 1179 1180 while (true) { 1181 int c = peekCharacter(); 1182 1183 if (c == ';') { 1184 out.append(';'); 1185 position++; 1186 break; 1187 1188 } else if (c >= 128 1189 || (c >= '0' && c <= '9') 1190 || (c >= 'a' && c <= 'z') 1191 || (c >= 'A' && c <= 'Z') 1192 || c == '_' 1193 || c == '-' 1194 || c == '#') { 1195 position++; 1196 out.append((char) c); 1197 1198 } else if (relaxed) { 1199 // intentionally leave the partial reference in 'out' 1200 return; 1201 1202 } else { 1203 throw new XmlPullParserException("unterminated entity ref", this, null); 1204 } 1205 } 1206 1207 String code = out.substring(start + 1, out.length() - 1); 1208 1209 if (isEntityToken) { 1210 name = code; 1211 } 1212 1213 if (code.startsWith("#")) { 1214 try { 1215 int c = code.startsWith("#x") 1216 ? Integer.parseInt(code.substring(2), 16) 1217 : Integer.parseInt(code.substring(1)); 1218 out.delete(start, out.length()); 1219 out.appendCodePoint(c); 1220 unresolved = false; 1221 return; 1222 } catch (NumberFormatException notANumber) { 1223 throw new XmlPullParserException("Invalid character reference: &" + code); 1224 } catch (IllegalArgumentException invalidCodePoint) { 1225 throw new XmlPullParserException("Invalid character reference: &" + code); 1226 } 1227 } 1228 1229 if (valueContext == ValueContext.ENTITY_DECLARATION) { 1230 // keep the unresolved &code; in the text to resolve later 1231 return; 1232 } 1233 1234 String defaultEntity = DEFAULT_ENTITIES.get(code); 1235 if (defaultEntity != null) { 1236 out.delete(start, out.length()); 1237 unresolved = false; 1238 out.append(defaultEntity); 1239 return; 1240 } 1241 1242 char[] resolved; 1243 if (documentEntities != null && (resolved = documentEntities.get(code)) != null) { 1244 out.delete(start, out.length()); 1245 unresolved = false; 1246 if (processDocDecl) { 1247 pushContentSource(resolved); // parse the entity as XML 1248 } else { 1249 out.append(resolved); // include the entity value as text 1250 } 1251 return; 1252 } 1253 1254 /* 1255 * The parser skipped an external DTD, and now we've encountered an 1256 * unknown entity that could have been declared there. Map it to the 1257 * empty string. This is dishonest, but it's consistent with Android's 1258 * old ExpatPullParser. 1259 */ 1260 if (systemId != null) { 1261 out.delete(start, out.length()); 1262 return; 1263 } 1264 1265 // keep the unresolved entity "&code;" in the text for relaxed clients 1266 unresolved = true; 1267 if (throwOnResolveFailure) { 1268 checkRelaxed("unresolved: &" + code + ";"); 1269 } 1270 } 1271 1272 /** 1273 * Where a value is found impacts how that value is interpreted. For 1274 * example, in attributes, "\n" must be replaced with a space character. In 1275 * text, "]]>" is forbidden. In entity declarations, named references are 1276 * not resolved. 1277 */ 1278 enum ValueContext { 1279 ATTRIBUTE, 1280 TEXT, 1281 ENTITY_DECLARATION 1282 } 1283 1284 /** 1285 * Returns the current text or attribute value. This also has the side 1286 * effect of setting isWhitespace to false if a non-whitespace character is 1287 * encountered. 1288 * 1289 * @param delimiter {@code <} for text, {@code "} and {@code '} for quoted 1290 * attributes, or a space for unquoted attributes. 1291 */ 1292 private String readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure, 1293 ValueContext valueContext) throws IOException, XmlPullParserException { 1294 1295 /* 1296 * This method returns all of the characters from the current position 1297 * through to an appropriate delimiter. 1298 * 1299 * If we're lucky (which we usually are), we'll return a single slice of 1300 * the buffer. This fast path avoids allocating a string builder. 1301 * 1302 * There are 6 unlucky characters we could encounter: 1303 * - "&": entities must be resolved. 1304 * - "%": parameter entities are unsupported in entity values. 1305 * - "<": this isn't permitted in attributes unless relaxed. 1306 * - "]": this requires a lookahead to defend against the forbidden 1307 * CDATA section delimiter "]]>". 1308 * - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it 1309 * isn't followed by "\n", we replace "\r" with either a "\n" 1310 * in text nodes or a space in attribute values. 1311 * - "\n": In attribute values, "\n" must be replaced with a space. 1312 * 1313 * We could also get unlucky by needing to refill the buffer midway 1314 * through the text. 1315 */ 1316 1317 int start = position; 1318 StringBuilder result = null; 1319 1320 // if a text section was already started, prefix the start 1321 if (valueContext == ValueContext.TEXT && text != null) { 1322 result = new StringBuilder(); 1323 result.append(text); 1324 } 1325 1326 while (true) { 1327 1328 /* 1329 * Make sure we have at least a single character to read from the 1330 * buffer. This mutates the buffer, so save the partial result 1331 * to the slow path string builder first. 1332 */ 1333 if (position >= limit) { 1334 if (start < position) { 1335 if (result == null) { 1336 result = new StringBuilder(); 1337 } 1338 result.append(buffer, start, position - start); 1339 } 1340 if (!fillBuffer(1)) { 1341 return result != null ? result.toString() : ""; 1342 } 1343 start = position; 1344 } 1345 1346 char c = buffer[position]; 1347 1348 if (c == delimiter 1349 || (delimiter == ' ' && (c <= ' ' || c == '>')) 1350 || c == '&' && !resolveEntities) { 1351 break; 1352 } 1353 1354 if (c != '\r' 1355 && (c != '\n' || valueContext != ValueContext.ATTRIBUTE) 1356 && c != '&' 1357 && c != '<' 1358 && (c != ']' || valueContext != ValueContext.TEXT) 1359 && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) { 1360 isWhitespace &= (c <= ' '); 1361 position++; 1362 continue; 1363 } 1364 1365 /* 1366 * We've encountered an unlucky character! Convert from fast 1367 * path to slow path if we haven't done so already. 1368 */ 1369 if (result == null) { 1370 result = new StringBuilder(); 1371 } 1372 result.append(buffer, start, position - start); 1373 1374 if (c == '\r') { 1375 if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') { 1376 position++; 1377 } 1378 c = (valueContext == ValueContext.ATTRIBUTE) ? ' ' : '\n'; 1379 1380 } else if (c == '\n') { 1381 c = ' '; 1382 1383 } else if (c == '&') { 1384 isWhitespace = false; // TODO: what if the entity resolves to whitespace? 1385 readEntity(result, false, throwOnResolveFailure, valueContext); 1386 start = position; 1387 continue; 1388 1389 } else if (c == '<') { 1390 if (valueContext == ValueContext.ATTRIBUTE) { 1391 checkRelaxed("Illegal: \"<\" inside attribute value"); 1392 } 1393 isWhitespace = false; 1394 1395 } else if (c == ']') { 1396 if ((position + 2 < limit || fillBuffer(3)) 1397 && buffer[position + 1] == ']' && buffer[position + 2] == '>') { 1398 checkRelaxed("Illegal: \"]]>\" outside CDATA section"); 1399 } 1400 isWhitespace = false; 1401 1402 } else if (c == '%') { 1403 throw new XmlPullParserException("This parser doesn't support parameter entities", 1404 this, null); 1405 1406 } else { 1407 throw new AssertionError(); 1408 } 1409 1410 position++; 1411 result.append(c); 1412 start = position; 1413 } 1414 1415 if (result == null) { 1416 return stringPool.get(buffer, start, position - start); 1417 } else { 1418 result.append(buffer, start, position - start); 1419 return result.toString(); 1420 } 1421 } 1422 1423 private void read(char expected) throws IOException, XmlPullParserException { 1424 int c = peekCharacter(); 1425 if (c != expected) { 1426 checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'"); 1427 } 1428 position++; 1429 } 1430 1431 private void read(char[] chars) throws IOException, XmlPullParserException { 1432 if (position + chars.length >= limit && !fillBuffer(chars.length)) { 1433 checkRelaxed("expected: '" + new String(chars) + "' but was EOF"); 1434 return; 1435 } 1436 1437 // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length) 1438 // when the VM has better method inlining 1439 for (int i = 0; i < chars.length; i++) { 1440 if (buffer[position + i] != chars[i]) { 1441 checkRelaxed("expected: \"" + new String(chars) + "\" but was \"" 1442 + new String(buffer, position, chars.length) + "...\""); 1443 } 1444 } 1445 1446 position += chars.length; 1447 } 1448 1449 private int peekCharacter() throws IOException, XmlPullParserException { 1450 if (position < limit || fillBuffer(1)) { 1451 return buffer[position]; 1452 } 1453 return -1; 1454 } 1455 1456 /** 1457 * Returns true once {@code limit - position >= minimum}. If the data is 1458 * exhausted before that many characters are available, this returns 1459 * false. 1460 */ 1461 private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException { 1462 // If we've exhausted the current content source, remove it 1463 while (nextContentSource != null) { 1464 if (position < limit) { 1465 throw new XmlPullParserException("Unbalanced entity!", this, null); 1466 } 1467 popContentSource(); 1468 if (limit - position >= minimum) { 1469 return true; 1470 } 1471 } 1472 1473 // Before clobbering the old characters, update where buffer starts 1474 for (int i = 0; i < position; i++) { 1475 if (buffer[i] == '\n') { 1476 bufferStartLine++; 1477 bufferStartColumn = 0; 1478 } else { 1479 bufferStartColumn++; 1480 } 1481 } 1482 1483 if (bufferCapture != null) { 1484 bufferCapture.append(buffer, 0, position); 1485 } 1486 1487 if (limit != position) { 1488 limit -= position; 1489 System.arraycopy(buffer, position, buffer, 0, limit); 1490 } else { 1491 limit = 0; 1492 } 1493 1494 position = 0; 1495 int total; 1496 while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) { 1497 limit += total; 1498 if (limit >= minimum) { 1499 return true; 1500 } 1501 } 1502 return false; 1503 } 1504 1505 /** 1506 * Returns an element or attribute name. This is always non-empty for 1507 * non-relaxed parsers. 1508 */ 1509 private String readName() throws IOException, XmlPullParserException { 1510 if (position >= limit && !fillBuffer(1)) { 1511 checkRelaxed("name expected"); 1512 return ""; 1513 } 1514 1515 int start = position; 1516 StringBuilder result = null; 1517 1518 // read the first character 1519 char c = buffer[position]; 1520 if ((c >= 'a' && c <= 'z') 1521 || (c >= 'A' && c <= 'Z') 1522 || c == '_' 1523 || c == ':' 1524 || c >= '\u00c0' // TODO: check the XML spec 1525 || relaxed) { 1526 position++; 1527 } else { 1528 checkRelaxed("name expected"); 1529 return ""; 1530 } 1531 1532 while (true) { 1533 /* 1534 * Make sure we have at least a single character to read from the 1535 * buffer. This mutates the buffer, so save the partial result 1536 * to the slow path string builder first. 1537 */ 1538 if (position >= limit) { 1539 if (result == null) { 1540 result = new StringBuilder(); 1541 } 1542 result.append(buffer, start, position - start); 1543 if (!fillBuffer(1)) { 1544 return result.toString(); 1545 } 1546 start = position; 1547 } 1548 1549 // read another character 1550 c = buffer[position]; 1551 if ((c >= 'a' && c <= 'z') 1552 || (c >= 'A' && c <= 'Z') 1553 || (c >= '0' && c <= '9') 1554 || c == '_' 1555 || c == '-' 1556 || c == ':' 1557 || c == '.' 1558 || c >= '\u00b7') { // TODO: check the XML spec 1559 position++; 1560 continue; 1561 } 1562 1563 // we encountered a non-name character. done! 1564 if (result == null) { 1565 return stringPool.get(buffer, start, position - start); 1566 } else { 1567 result.append(buffer, start, position - start); 1568 return result.toString(); 1569 } 1570 } 1571 } 1572 1573 private void skip() throws IOException, XmlPullParserException { 1574 while (position < limit || fillBuffer(1)) { 1575 int c = buffer[position]; 1576 if (c > ' ') { 1577 break; 1578 } 1579 position++; 1580 } 1581 } 1582 1583 // public part starts here... 1584 1585 public void setInput(Reader reader) throws XmlPullParserException { 1586 this.reader = reader; 1587 1588 type = START_DOCUMENT; 1589 name = null; 1590 namespace = null; 1591 degenerated = false; 1592 attributeCount = -1; 1593 encoding = null; 1594 version = null; 1595 standalone = null; 1596 1597 if (reader == null) { 1598 return; 1599 } 1600 1601 position = 0; 1602 limit = 0; 1603 bufferStartLine = 0; 1604 bufferStartColumn = 0; 1605 depth = 0; 1606 documentEntities = null; 1607 } 1608 1609 public void setInput(InputStream is, String charset) throws XmlPullParserException { 1610 position = 0; 1611 limit = 0; 1612 boolean detectCharset = (charset == null); 1613 1614 if (is == null) { 1615 throw new IllegalArgumentException("is == null"); 1616 } 1617 1618 try { 1619 if (detectCharset) { 1620 // read the four bytes looking for an indication of the encoding in use 1621 int firstFourBytes = 0; 1622 while (limit < 4) { 1623 int i = is.read(); 1624 if (i == -1) { 1625 break; 1626 } 1627 firstFourBytes = (firstFourBytes << 8) | i; 1628 buffer[limit++] = (char) i; 1629 } 1630 1631 if (limit == 4) { 1632 switch (firstFourBytes) { 1633 case 0x00000FEFF: // UTF-32BE BOM 1634 charset = "UTF-32BE"; 1635 limit = 0; 1636 break; 1637 1638 case 0x0FFFE0000: // UTF-32LE BOM 1639 charset = "UTF-32LE"; 1640 limit = 0; 1641 break; 1642 1643 case 0x0000003c: // '<' in UTF-32BE 1644 charset = "UTF-32BE"; 1645 buffer[0] = '<'; 1646 limit = 1; 1647 break; 1648 1649 case 0x03c000000: // '<' in UTF-32LE 1650 charset = "UTF-32LE"; 1651 buffer[0] = '<'; 1652 limit = 1; 1653 break; 1654 1655 case 0x0003c003f: // "<?" in UTF-16BE 1656 charset = "UTF-16BE"; 1657 buffer[0] = '<'; 1658 buffer[1] = '?'; 1659 limit = 2; 1660 break; 1661 1662 case 0x03c003f00: // "<?" in UTF-16LE 1663 charset = "UTF-16LE"; 1664 buffer[0] = '<'; 1665 buffer[1] = '?'; 1666 limit = 2; 1667 break; 1668 1669 case 0x03c3f786d: // "<?xm" in ASCII etc. 1670 while (true) { 1671 int i = is.read(); 1672 if (i == -1) { 1673 break; 1674 } 1675 buffer[limit++] = (char) i; 1676 if (i == '>') { 1677 String s = new String(buffer, 0, limit); 1678 int i0 = s.indexOf("encoding"); 1679 if (i0 != -1) { 1680 while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') { 1681 i0++; 1682 } 1683 char deli = s.charAt(i0++); 1684 int i1 = s.indexOf(deli, i0); 1685 charset = s.substring(i0, i1); 1686 } 1687 break; 1688 } 1689 } 1690 break; 1691 1692 default: 1693 // handle a byte order mark followed by something other than <? 1694 if ((firstFourBytes & 0x0ffff0000) == 0x0feff0000) { 1695 charset = "UTF-16BE"; 1696 buffer[0] = (char) ((buffer[2] << 8) | buffer[3]); 1697 limit = 1; 1698 } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) { 1699 charset = "UTF-16LE"; 1700 buffer[0] = (char) ((buffer[3] << 8) | buffer[2]); 1701 limit = 1; 1702 } else if ((firstFourBytes & 0x0ffffff00) == 0x0efbbbf00) { 1703 charset = "UTF-8"; 1704 buffer[0] = buffer[3]; 1705 limit = 1; 1706 } 1707 } 1708 } 1709 } 1710 1711 if (charset == null) { 1712 charset = "UTF-8"; 1713 } 1714 1715 int savedLimit = limit; 1716 setInput(new InputStreamReader(is, charset)); 1717 encoding = charset; 1718 limit = savedLimit; 1719 1720 /* 1721 * Skip the optional BOM if we didn't above. This decrements limit 1722 * rather than incrementing position so that <?xml version='1.0'?> 1723 * is still at character 0. 1724 */ 1725 if (!detectCharset && peekCharacter() == 0xfeff) { 1726 limit--; 1727 System.arraycopy(buffer, 1, buffer, 0, limit); 1728 } 1729 } catch (Exception e) { 1730 throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e); 1731 } 1732 } 1733 1734 public void close() throws IOException { 1735 if (reader != null) { 1736 reader.close(); 1737 } 1738 } 1739 1740 public boolean getFeature(String feature) { 1741 if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { 1742 return processNsp; 1743 } else if (FEATURE_RELAXED.equals(feature)) { 1744 return relaxed; 1745 } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) { 1746 return processDocDecl; 1747 } else { 1748 return false; 1749 } 1750 } 1751 1752 public String getInputEncoding() { 1753 return encoding; 1754 } 1755 1756 public void defineEntityReplacementText(String entity, String value) 1757 throws XmlPullParserException { 1758 if (processDocDecl) { 1759 throw new IllegalStateException( 1760 "Entity replacement text may not be defined with DOCTYPE processing enabled."); 1761 } 1762 if (reader == null) { 1763 throw new IllegalStateException( 1764 "Entity replacement text must be defined after setInput()"); 1765 } 1766 if (documentEntities == null) { 1767 documentEntities = new HashMap<String, char[]>(); 1768 } 1769 documentEntities.put(entity, value.toCharArray()); 1770 } 1771 1772 public Object getProperty(String property) { 1773 if (property.equals(PROPERTY_XMLDECL_VERSION)) { 1774 return version; 1775 } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) { 1776 return standalone; 1777 } else if (property.equals(PROPERTY_LOCATION)) { 1778 return location != null ? location : reader.toString(); 1779 } else { 1780 return null; 1781 } 1782 } 1783 1784 /** 1785 * Returns the root element's name if it was declared in the DTD. This 1786 * equals the first tag's name for valid documents. 1787 */ 1788 public String getRootElementName() { 1789 return rootElementName; 1790 } 1791 1792 /** 1793 * Returns the document's system ID if it was declared. This is typically a 1794 * string like {@code http://www.w3.org/TR/html4/strict.dtd}. 1795 */ 1796 public String getSystemId() { 1797 return systemId; 1798 } 1799 1800 /** 1801 * Returns the document's public ID if it was declared. This is typically a 1802 * string like {@code -//W3C//DTD HTML 4.01//EN}. 1803 */ 1804 public String getPublicId() { 1805 return publicId; 1806 } 1807 1808 public int getNamespaceCount(int depth) { 1809 if (depth > this.depth) { 1810 throw new IndexOutOfBoundsException(); 1811 } 1812 return nspCounts[depth]; 1813 } 1814 1815 public String getNamespacePrefix(int pos) { 1816 return nspStack[pos * 2]; 1817 } 1818 1819 public String getNamespaceUri(int pos) { 1820 return nspStack[(pos * 2) + 1]; 1821 } 1822 1823 public String getNamespace(String prefix) { 1824 if ("xml".equals(prefix)) { 1825 return "http://www.w3.org/XML/1998/namespace"; 1826 } 1827 if ("xmlns".equals(prefix)) { 1828 return "http://www.w3.org/2000/xmlns/"; 1829 } 1830 1831 for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) { 1832 if (prefix == null) { 1833 if (nspStack[i] == null) { 1834 return nspStack[i + 1]; 1835 } 1836 } else if (prefix.equals(nspStack[i])) { 1837 return nspStack[i + 1]; 1838 } 1839 } 1840 return null; 1841 } 1842 1843 public int getDepth() { 1844 return depth; 1845 } 1846 1847 public String getPositionDescription() { 1848 StringBuilder buf = new StringBuilder(type < TYPES.length ? TYPES[type] : "unknown"); 1849 buf.append(' '); 1850 1851 if (type == START_TAG || type == END_TAG) { 1852 if (degenerated) { 1853 buf.append("(empty) "); 1854 } 1855 buf.append('<'); 1856 if (type == END_TAG) { 1857 buf.append('/'); 1858 } 1859 1860 if (prefix != null) { 1861 buf.append("{" + namespace + "}" + prefix + ":"); 1862 } 1863 buf.append(name); 1864 1865 int cnt = attributeCount * 4; 1866 for (int i = 0; i < cnt; i += 4) { 1867 buf.append(' '); 1868 if (attributes[i + 1] != null) { 1869 buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":"); 1870 } 1871 buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'"); 1872 } 1873 1874 buf.append('>'); 1875 } else if (type == IGNORABLE_WHITESPACE) { 1876 ; 1877 } else if (type != TEXT) { 1878 buf.append(getText()); 1879 } else if (isWhitespace) { 1880 buf.append("(whitespace)"); 1881 } else { 1882 String text = getText(); 1883 if (text.length() > 16) { 1884 text = text.substring(0, 16) + "..."; 1885 } 1886 buf.append(text); 1887 } 1888 1889 buf.append("@" + getLineNumber() + ":" + getColumnNumber()); 1890 if (location != null) { 1891 buf.append(" in "); 1892 buf.append(location); 1893 } else if (reader != null) { 1894 buf.append(" in "); 1895 buf.append(reader.toString()); 1896 } 1897 return buf.toString(); 1898 } 1899 1900 public int getLineNumber() { 1901 int result = bufferStartLine; 1902 for (int i = 0; i < position; i++) { 1903 if (buffer[i] == '\n') { 1904 result++; 1905 } 1906 } 1907 return result + 1; // the first line is '1' 1908 } 1909 1910 public int getColumnNumber() { 1911 int result = bufferStartColumn; 1912 for (int i = 0; i < position; i++) { 1913 if (buffer[i] == '\n') { 1914 result = 0; 1915 } else { 1916 result++; 1917 } 1918 } 1919 return result + 1; // the first column is '1' 1920 } 1921 1922 public boolean isWhitespace() throws XmlPullParserException { 1923 if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) { 1924 throw new XmlPullParserException(ILLEGAL_TYPE, this, null); 1925 } 1926 return isWhitespace; 1927 } 1928 1929 public String getText() { 1930 if (type < TEXT || (type == ENTITY_REF && unresolved)) { 1931 return null; 1932 } else if (text == null) { 1933 return ""; 1934 } else { 1935 return text; 1936 } 1937 } 1938 1939 public char[] getTextCharacters(int[] poslen) { 1940 String text = getText(); 1941 if (text == null) { 1942 poslen[0] = -1; 1943 poslen[1] = -1; 1944 return null; 1945 } 1946 char[] result = text.toCharArray(); 1947 poslen[0] = 0; 1948 poslen[1] = result.length; 1949 return result; 1950 } 1951 1952 public String getNamespace() { 1953 return namespace; 1954 } 1955 1956 public String getName() { 1957 return name; 1958 } 1959 1960 public String getPrefix() { 1961 return prefix; 1962 } 1963 1964 public boolean isEmptyElementTag() throws XmlPullParserException { 1965 if (type != START_TAG) { 1966 throw new XmlPullParserException(ILLEGAL_TYPE, this, null); 1967 } 1968 return degenerated; 1969 } 1970 1971 public int getAttributeCount() { 1972 return attributeCount; 1973 } 1974 1975 public String getAttributeType(int index) { 1976 return "CDATA"; 1977 } 1978 1979 public boolean isAttributeDefault(int index) { 1980 return false; 1981 } 1982 1983 public String getAttributeNamespace(int index) { 1984 if (index >= attributeCount) { 1985 throw new IndexOutOfBoundsException(); 1986 } 1987 return attributes[index * 4]; 1988 } 1989 1990 public String getAttributeName(int index) { 1991 if (index >= attributeCount) { 1992 throw new IndexOutOfBoundsException(); 1993 } 1994 return attributes[(index * 4) + 2]; 1995 } 1996 1997 public String getAttributePrefix(int index) { 1998 if (index >= attributeCount) { 1999 throw new IndexOutOfBoundsException(); 2000 } 2001 return attributes[(index * 4) + 1]; 2002 } 2003 2004 public String getAttributeValue(int index) { 2005 if (index >= attributeCount) { 2006 throw new IndexOutOfBoundsException(); 2007 } 2008 return attributes[(index * 4) + 3]; 2009 } 2010 2011 public String getAttributeValue(String namespace, String name) { 2012 for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) { 2013 if (attributes[i + 2].equals(name) 2014 && (namespace == null || attributes[i].equals(namespace))) { 2015 return attributes[i + 3]; 2016 } 2017 } 2018 2019 return null; 2020 } 2021 2022 public int getEventType() throws XmlPullParserException { 2023 return type; 2024 } 2025 2026 // utility methods to make XML parsing easier ... 2027 2028 public int nextTag() throws XmlPullParserException, IOException { 2029 next(); 2030 if (type == TEXT && isWhitespace) { 2031 next(); 2032 } 2033 2034 if (type != END_TAG && type != START_TAG) { 2035 throw new XmlPullParserException("unexpected type", this, null); 2036 } 2037 2038 return type; 2039 } 2040 2041 public void require(int type, String namespace, String name) 2042 throws XmlPullParserException, IOException { 2043 if (type != this.type 2044 || (namespace != null && !namespace.equals(getNamespace())) 2045 || (name != null && !name.equals(getName()))) { 2046 throw new XmlPullParserException( 2047 "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null); 2048 } 2049 } 2050 2051 public String nextText() throws XmlPullParserException, IOException { 2052 if (type != START_TAG) { 2053 throw new XmlPullParserException("precondition: START_TAG", this, null); 2054 } 2055 2056 next(); 2057 2058 String result; 2059 if (type == TEXT) { 2060 result = getText(); 2061 next(); 2062 } else { 2063 result = ""; 2064 } 2065 2066 if (type != END_TAG) { 2067 throw new XmlPullParserException("END_TAG expected", this, null); 2068 } 2069 2070 return result; 2071 } 2072 2073 public void setFeature(String feature, boolean value) throws XmlPullParserException { 2074 if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { 2075 processNsp = value; 2076 } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) { 2077 processDocDecl = value; 2078 } else if (FEATURE_RELAXED.equals(feature)) { 2079 relaxed = value; 2080 } else { 2081 throw new XmlPullParserException("unsupported feature: " + feature, this, null); 2082 } 2083 } 2084 2085 public void setProperty(String property, Object value) throws XmlPullParserException { 2086 if (property.equals(PROPERTY_LOCATION)) { 2087 location = String.valueOf(value); 2088 } else { 2089 throw new XmlPullParserException("unsupported property: " + property); 2090 } 2091 } 2092 2093 /** 2094 * A chain of buffers containing XML content. Each content source contains 2095 * the parser's primary read buffer or the characters of entities actively 2096 * being parsed. 2097 * 2098 * <p>For example, note the buffers needed to parse this document: 2099 * <pre> {@code 2100 * <!DOCTYPE foo [ 2101 * <!ENTITY baz "ghi"> 2102 * <!ENTITY bar "def &baz; jkl"> 2103 * ]> 2104 * <foo>abc &bar; mno</foo> 2105 * }</pre> 2106 * 2107 * <p>Things get interesting when the bar entity is encountered. At that 2108 * point two buffers are active: 2109 * <ol> 2110 * <li>The value for the bar entity, containing {@code "def &baz; jkl"} 2111 * <li>The parser's primary read buffer, containing {@code " mno</foo>"} 2112 * </ol> 2113 * <p>The parser will return the characters {@code "def "} from the bar 2114 * entity's buffer, and then it will encounter the baz entity. To handle 2115 * that, three buffers will be active: 2116 * <ol> 2117 * <li>The value for the baz entity, containing {@code "ghi"} 2118 * <li>The remaining value for the bar entity, containing {@code " jkl"} 2119 * <li>The parser's primary read buffer, containing {@code " mno</foo>"} 2120 * </ol> 2121 * <p>The parser will then return the characters {@code ghi jkl mno} in that 2122 * sequence by reading each buffer in sequence. 2123 */ 2124 static class ContentSource { 2125 private final ContentSource next; 2126 private final char[] buffer; 2127 private final int position; 2128 private final int limit; 2129 ContentSource(ContentSource next, char[] buffer, int position, int limit) { 2130 this.next = next; 2131 this.buffer = buffer; 2132 this.position = position; 2133 this.limit = limit; 2134 } 2135 } 2136 2137 /** 2138 * Prepends the characters of {@code newBuffer} to be read before the 2139 * current buffer. 2140 */ 2141 private void pushContentSource(char[] newBuffer) { 2142 nextContentSource = new ContentSource(nextContentSource, buffer, position, limit); 2143 buffer = newBuffer; 2144 position = 0; 2145 limit = newBuffer.length; 2146 } 2147 2148 /** 2149 * Replaces the current exhausted buffer with the next buffer in the chain. 2150 */ 2151 private void popContentSource() { 2152 buffer = nextContentSource.buffer; 2153 position = nextContentSource.position; 2154 limit = nextContentSource.limit; 2155 nextContentSource = nextContentSource.next; 2156 } 2157 } 2158