1 /* Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 * sell copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 * IN THE SOFTWARE. */ 20 21 // Contributors: Paul Hackenberger (unterminated entity handling in relaxed mode) 22 23 package org.kxml2.io; 24 25 import java.io.Closeable; 26 import java.io.IOException; 27 import java.io.InputStream; 28 import java.io.InputStreamReader; 29 import java.io.Reader; 30 import java.util.HashMap; 31 import java.util.Map; 32 import libcore.internal.StringPool; 33 import org.xmlpull.v1.XmlPullParser; 34 import org.xmlpull.v1.XmlPullParserException; 35 36 /** 37 * An XML pull parser with limited support for parsing internal DTDs. 38 */ 39 public class KXmlParser implements XmlPullParser, Closeable { 40 41 private static final String PROPERTY_XMLDECL_VERSION 42 = "http://xmlpull.org/v1/doc/properties.html#xmldecl-version"; 43 private static final String PROPERTY_XMLDECL_STANDALONE 44 = "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone"; 45 private static final String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location"; 46 private static final String FEATURE_RELAXED = "http://xmlpull.org/v1/doc/features.html#relaxed"; 47 48 private static final Map<String, String> DEFAULT_ENTITIES = new HashMap<String, String>(); 49 static { 50 DEFAULT_ENTITIES.put("lt", "<"); 51 DEFAULT_ENTITIES.put("gt", ">"); 52 DEFAULT_ENTITIES.put("amp", "&"); 53 DEFAULT_ENTITIES.put("apos", "'"); 54 DEFAULT_ENTITIES.put("quot", "\""); 55 } 56 57 private static final int ELEMENTDECL = 11; 58 private static final int ENTITYDECL = 12; 59 private static final int ATTLISTDECL = 13; 60 private static final int NOTATIONDECL = 14; 61 private static final int PARAMETER_ENTITY_REF = 15; 62 private static final char[] START_COMMENT = { '<', '!', '-', '-' }; 63 private static final char[] END_COMMENT = { '-', '-', '>' }; 64 private static final char[] COMMENT_DOUBLE_DASH = { '-', '-' }; 65 private static final char[] START_CDATA = { '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[' }; 66 private static final char[] END_CDATA = { ']', ']', '>' }; 67 private static final char[] START_PROCESSING_INSTRUCTION = { '<', '?' }; 68 private static final char[] END_PROCESSING_INSTRUCTION = { '?', '>' }; 69 private static final char[] START_DOCTYPE = { '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E' }; 70 private static final char[] SYSTEM = { 'S', 'Y', 'S', 'T', 'E', 'M' }; 71 private static final char[] PUBLIC = { 'P', 'U', 'B', 'L', 'I', 'C' }; 72 private static final char[] START_ELEMENT = { '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T' }; 73 private static final char[] START_ATTLIST = { '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T' }; 74 private static final char[] START_ENTITY = { '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y' }; 75 private static final char[] START_NOTATION = { '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' }; 76 private static final char[] EMPTY = new char[] { 'E', 'M', 'P', 'T', 'Y' }; 77 private static final char[] ANY = new char[]{ 'A', 'N', 'Y' }; 78 private static final char[] NDATA = new char[]{ 'N', 'D', 'A', 'T', 'A' }; 79 private static final char[] NOTATION = new char[]{ 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N' }; 80 private static final char[] REQUIRED = new char[] { 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D' }; 81 private static final char[] IMPLIED = new char[] { 'I', 'M', 'P', 'L', 'I', 'E', 'D' }; 82 private static final char[] FIXED = new char[] { 'F', 'I', 'X', 'E', 'D' }; 83 84 static final private String UNEXPECTED_EOF = "Unexpected EOF"; 85 static final private String ILLEGAL_TYPE = "Wrong event type"; 86 static final private int XML_DECLARATION = 998; 87 88 // general 89 private String location; 90 91 private String version; 92 private Boolean standalone; 93 private String rootElementName; 94 private String systemId; 95 private String publicId; 96 97 /** 98 * True if the {@code <!DOCTYPE>} contents are handled. The DTD defines 99 * entity values and default attribute values. These values are parsed at 100 * inclusion time and may contain both tags and entity references. 101 * 102 * <p>If this is false, the user must {@link #defineEntityReplacementText 103 * define entity values manually}. Such entity values are literal strings 104 * and will not be parsed. There is no API to define default attributes 105 * manually. 106 */ 107 private boolean processDocDecl; 108 private boolean processNsp; 109 private boolean relaxed; 110 private boolean keepNamespaceAttributes; 111 112 /** 113 * If non-null, the contents of the read buffer must be copied into this 114 * string builder before the read buffer is overwritten. This is used to 115 * capture the raw DTD text while parsing the DTD. 116 */ 117 private StringBuilder bufferCapture; 118 119 /** 120 * Entities defined in or for this document. This map is created lazily. 121 */ 122 private Map<String, char[]> documentEntities; 123 124 /** 125 * Default attributes in this document. The outer map's key is the element 126 * name; the inner map's key is the attribute name. Both keys should be 127 * without namespace adjustments. This map is created lazily. 128 */ 129 private Map<String, Map<String, String>> defaultAttributes; 130 131 132 private int depth; 133 private String[] elementStack = new String[16]; 134 private String[] nspStack = new String[8]; 135 private int[] nspCounts = new int[4]; 136 137 // source 138 139 private Reader reader; 140 private String encoding; 141 private ContentSource nextContentSource; 142 private char[] buffer = new char[8192]; 143 private int position = 0; 144 private int limit = 0; 145 146 /* 147 * Track the number of newlines and columns preceding the current buffer. To 148 * compute the line and column of a position in the buffer, compute the line 149 * and column in the buffer and add the preceding values. 150 */ 151 private int bufferStartLine; 152 private int bufferStartColumn; 153 154 // the current token 155 156 private int type; 157 private boolean isWhitespace; 158 private String namespace; 159 private String prefix; 160 private String name; 161 private String text; 162 163 private boolean degenerated; 164 private int attributeCount; 165 166 /* 167 * The current element's attributes arranged in groups of 4: 168 * i + 0 = attribute namespace URI 169 * i + 1 = attribute namespace prefix 170 * i + 2 = attribute qualified name (may contain ":", as in "html:h1") 171 * i + 3 = attribute value 172 */ 173 private String[] attributes = new String[16]; 174 175 private String error; 176 177 private boolean unresolved; 178 179 public final StringPool stringPool = new StringPool(); 180 181 /** 182 * Retains namespace attributes like {@code xmlns="http://foo"} or {@code xmlns:foo="http:foo"} 183 * in pulled elements. Most applications will only be interested in the effective namespaces of 184 * their elements, so these attributes aren't useful. But for structure preserving wrappers like 185 * DOM, it is necessary to keep the namespace data around. 186 */ 187 public void keepNamespaceAttributes() { 188 this.keepNamespaceAttributes = true; 189 } 190 191 private boolean adjustNsp() throws XmlPullParserException { 192 boolean any = false; 193 194 for (int i = 0; i < attributeCount << 2; i += 4) { 195 String attrName = attributes[i + 2]; 196 int cut = attrName.indexOf(':'); 197 String prefix; 198 199 if (cut != -1) { 200 prefix = attrName.substring(0, cut); 201 attrName = attrName.substring(cut + 1); 202 } else if (attrName.equals("xmlns")) { 203 prefix = attrName; 204 attrName = null; 205 } else { 206 continue; 207 } 208 209 if (!prefix.equals("xmlns")) { 210 any = true; 211 } else { 212 int j = (nspCounts[depth]++) << 1; 213 214 nspStack = ensureCapacity(nspStack, j + 2); 215 nspStack[j] = attrName; 216 nspStack[j + 1] = attributes[i + 3]; 217 218 if (attrName != null && attributes[i + 3].isEmpty()) { 219 checkRelaxed("illegal empty namespace"); 220 } 221 222 if (keepNamespaceAttributes) { 223 // explicitly set the namespace for unprefixed attributes 224 // such as xmlns="http://foo" 225 attributes[i] = "http://www.w3.org/2000/xmlns/"; 226 any = true; 227 } else { 228 System.arraycopy( 229 attributes, 230 i + 4, 231 attributes, 232 i, 233 ((--attributeCount) << 2) - i); 234 235 i -= 4; 236 } 237 } 238 } 239 240 if (any) { 241 for (int i = (attributeCount << 2) - 4; i >= 0; i -= 4) { 242 243 String attrName = attributes[i + 2]; 244 int cut = attrName.indexOf(':'); 245 246 if (cut == 0 && !relaxed) { 247 throw new RuntimeException( 248 "illegal attribute name: " + attrName + " at " + this); 249 } else if (cut != -1) { 250 String attrPrefix = attrName.substring(0, cut); 251 252 attrName = attrName.substring(cut + 1); 253 254 String attrNs = getNamespace(attrPrefix); 255 256 if (attrNs == null && !relaxed) { 257 throw new RuntimeException( 258 "Undefined Prefix: " + attrPrefix + " in " + this); 259 } 260 261 attributes[i] = attrNs; 262 attributes[i + 1] = attrPrefix; 263 attributes[i + 2] = attrName; 264 } 265 } 266 } 267 268 int cut = name.indexOf(':'); 269 270 if (cut == 0) { 271 checkRelaxed("illegal tag name: " + name); 272 } 273 274 if (cut != -1) { 275 prefix = name.substring(0, cut); 276 name = name.substring(cut + 1); 277 } 278 279 this.namespace = getNamespace(prefix); 280 281 if (this.namespace == null) { 282 if (prefix != null) { 283 checkRelaxed("undefined prefix: " + prefix); 284 } 285 this.namespace = NO_NAMESPACE; 286 } 287 288 return any; 289 } 290 291 private String[] ensureCapacity(String[] arr, int required) { 292 if (arr.length >= required) { 293 return arr; 294 } 295 String[] bigger = new String[required + 16]; 296 System.arraycopy(arr, 0, bigger, 0, arr.length); 297 return bigger; 298 } 299 300 private void checkRelaxed(String errorMessage) throws XmlPullParserException { 301 if (!relaxed) { 302 throw new XmlPullParserException(errorMessage, this, null); 303 } 304 if (error == null) { 305 error = "Error: " + errorMessage; 306 } 307 } 308 309 public int next() throws XmlPullParserException, IOException { 310 return next(false); 311 } 312 313 public int nextToken() throws XmlPullParserException, IOException { 314 return next(true); 315 } 316 317 private int next(boolean justOneToken) throws IOException, XmlPullParserException { 318 if (reader == null) { 319 throw new XmlPullParserException("setInput() must be called first.", this, null); 320 } 321 322 if (type == END_TAG) { 323 depth--; 324 } 325 326 // degenerated needs to be handled before error because of possible 327 // processor expectations(!) 328 329 if (degenerated) { 330 degenerated = false; 331 type = END_TAG; 332 return type; 333 } 334 335 if (error != null) { 336 if (justOneToken) { 337 text = error; 338 type = COMMENT; 339 error = null; 340 return type; 341 } else { 342 error = null; 343 } 344 } 345 346 type = peekType(false); 347 348 if (type == XML_DECLARATION) { 349 readXmlDeclaration(); 350 type = peekType(false); 351 } 352 353 text = null; 354 isWhitespace = true; 355 prefix = null; 356 name = null; 357 namespace = null; 358 attributeCount = -1; 359 boolean throwOnResolveFailure = !justOneToken; 360 361 while (true) { 362 switch (type) { 363 364 /* 365 * Return immediately after encountering a start tag, end tag, or 366 * the end of the document. 367 */ 368 case START_TAG: 369 parseStartTag(false, throwOnResolveFailure); 370 return type; 371 case END_TAG: 372 readEndTag(); 373 return type; 374 case END_DOCUMENT: 375 return type; 376 377 /* 378 * Return after any text token when we're looking for a single 379 * token. Otherwise concatenate all text between tags. 380 */ 381 case ENTITY_REF: 382 if (justOneToken) { 383 StringBuilder entityTextBuilder = new StringBuilder(); 384 readEntity(entityTextBuilder, true, throwOnResolveFailure, ValueContext.TEXT); 385 text = entityTextBuilder.toString(); 386 break; 387 } 388 // fall-through 389 case TEXT: 390 text = readValue('<', !justOneToken, throwOnResolveFailure, ValueContext.TEXT); 391 if (depth == 0 && isWhitespace) { 392 type = IGNORABLE_WHITESPACE; 393 } 394 break; 395 case CDSECT: 396 read(START_CDATA); 397 text = readUntil(END_CDATA, true); 398 break; 399 400 /* 401 * Comments, processing instructions and declarations are returned 402 * when we're looking for a single token. Otherwise they're skipped. 403 */ 404 case COMMENT: 405 String commentText = readComment(justOneToken); 406 if (justOneToken) { 407 text = commentText; 408 } 409 break; 410 case PROCESSING_INSTRUCTION: 411 read(START_PROCESSING_INSTRUCTION); 412 String processingInstruction = readUntil(END_PROCESSING_INSTRUCTION, justOneToken); 413 if (justOneToken) { 414 text = processingInstruction; 415 } 416 break; 417 case DOCDECL: 418 readDoctype(justOneToken); 419 break; 420 421 default: 422 throw new XmlPullParserException("Unexpected token", this, null); 423 } 424 425 if (depth == 0 && (type == ENTITY_REF || type == TEXT || type == CDSECT)) { 426 throw new XmlPullParserException("Unexpected token", this, null); 427 } 428 429 if (justOneToken) { 430 return type; 431 } 432 433 if (type == IGNORABLE_WHITESPACE) { 434 text = null; 435 } 436 437 /* 438 * We've read all that we can of a non-empty text block. Always 439 * report this as text, even if it was a CDATA block or entity 440 * reference. 441 */ 442 int peek = peekType(false); 443 if (text != null && !text.isEmpty() && peek < TEXT) { 444 type = TEXT; 445 return type; 446 } 447 448 type = peek; 449 } 450 } 451 452 /** 453 * Reads text until the specified delimiter is encountered. Consumes the 454 * text and the delimiter. 455 * 456 * @param returnText true to return the read text excluding the delimiter; 457 * false to return null. 458 */ 459 private String readUntil(char[] delimiter, boolean returnText) 460 throws IOException, XmlPullParserException { 461 int start = position; 462 StringBuilder result = null; 463 464 if (returnText && text != null) { 465 result = new StringBuilder(); 466 result.append(text); 467 } 468 469 search: 470 while (true) { 471 if (position + delimiter.length > limit) { 472 if (start < position && returnText) { 473 if (result == null) { 474 result = new StringBuilder(); 475 } 476 result.append(buffer, start, position - start); 477 } 478 if (!fillBuffer(delimiter.length)) { 479 checkRelaxed(UNEXPECTED_EOF); 480 type = COMMENT; 481 return null; 482 } 483 start = position; 484 } 485 486 // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length) 487 // when the VM has better method inlining 488 for (int i = 0; i < delimiter.length; i++) { 489 if (buffer[position + i] != delimiter[i]) { 490 position++; 491 continue search; 492 } 493 } 494 495 break; 496 } 497 498 int end = position; 499 position += delimiter.length; 500 501 if (!returnText) { 502 return null; 503 } else if (result == null) { 504 return stringPool.get(buffer, start, end - start); 505 } else { 506 result.append(buffer, start, end - start); 507 return result.toString(); 508 } 509 } 510 511 /** 512 * Returns true if an XML declaration was read. 513 */ 514 private void readXmlDeclaration() throws IOException, XmlPullParserException { 515 if (bufferStartLine != 0 || bufferStartColumn != 0 || position != 0) { 516 checkRelaxed("processing instructions must not start with xml"); 517 } 518 519 read(START_PROCESSING_INSTRUCTION); 520 parseStartTag(true, true); 521 522 if (attributeCount < 1 || !"version".equals(attributes[2])) { 523 checkRelaxed("version expected"); 524 } 525 526 version = attributes[3]; 527 528 int pos = 1; 529 530 if (pos < attributeCount && "encoding".equals(attributes[2 + 4])) { 531 encoding = attributes[3 + 4]; 532 pos++; 533 } 534 535 if (pos < attributeCount && "standalone".equals(attributes[4 * pos + 2])) { 536 String st = attributes[3 + 4 * pos]; 537 if ("yes".equals(st)) { 538 standalone = Boolean.TRUE; 539 } else if ("no".equals(st)) { 540 standalone = Boolean.FALSE; 541 } else { 542 checkRelaxed("illegal standalone value: " + st); 543 } 544 pos++; 545 } 546 547 if (pos != attributeCount) { 548 checkRelaxed("unexpected attributes in XML declaration"); 549 } 550 551 isWhitespace = true; 552 text = null; 553 } 554 555 private String readComment(boolean returnText) throws IOException, XmlPullParserException { 556 read(START_COMMENT); 557 558 if (relaxed) { 559 return readUntil(END_COMMENT, returnText); 560 } 561 562 String commentText = readUntil(COMMENT_DOUBLE_DASH, returnText); 563 if (peekCharacter() != '>') { 564 throw new XmlPullParserException("Comments may not contain --", this, null); 565 } 566 position++; 567 return commentText; 568 } 569 570 /** 571 * Read the document's DTD. Although this parser is non-validating, the DTD 572 * must be parsed to capture entity values and default attribute values. 573 */ 574 private void readDoctype(boolean saveDtdText) throws IOException, XmlPullParserException { 575 read(START_DOCTYPE); 576 577 int startPosition = -1; 578 if (saveDtdText) { 579 bufferCapture = new StringBuilder(); 580 startPosition = position; 581 } 582 try { 583 skip(); 584 rootElementName = readName(); 585 readExternalId(true, true); 586 skip(); 587 if (peekCharacter() == '[') { 588 readInternalSubset(); 589 } 590 skip(); 591 } finally { 592 if (saveDtdText) { 593 bufferCapture.append(buffer, 0, position); 594 bufferCapture.delete(0, startPosition); 595 text = bufferCapture.toString(); 596 bufferCapture = null; 597 } 598 } 599 600 read('>'); 601 } 602 603 /** 604 * Reads an external ID of one of these two forms: 605 * SYSTEM "quoted system name" 606 * PUBLIC "quoted public id" "quoted system name" 607 * 608 * If the system name is not required, this also supports lone public IDs of 609 * this form: 610 * PUBLIC "quoted public id" 611 * 612 * Returns true if any ID was read. 613 */ 614 private boolean readExternalId(boolean requireSystemName, boolean assignFields) 615 throws IOException, XmlPullParserException { 616 skip(); 617 int c = peekCharacter(); 618 619 if (c == 'S') { 620 read(SYSTEM); 621 } else if (c == 'P') { 622 read(PUBLIC); 623 skip(); 624 if (assignFields) { 625 publicId = readQuotedId(true); 626 } else { 627 readQuotedId(false); 628 } 629 } else { 630 return false; 631 } 632 633 skip(); 634 635 if (!requireSystemName) { 636 int delimiter = peekCharacter(); 637 if (delimiter != '"' && delimiter != '\'') { 638 return true; // no system name! 639 } 640 } 641 642 if (assignFields) { 643 systemId = readQuotedId(true); 644 } else { 645 readQuotedId(false); 646 } 647 return true; 648 } 649 650 private static final char[] SINGLE_QUOTE = new char[] { '\'' }; 651 private static final char[] DOUBLE_QUOTE = new char[] { '"' }; 652 653 /** 654 * Reads a quoted string, performing no entity escaping of the contents. 655 */ 656 private String readQuotedId(boolean returnText) throws IOException, XmlPullParserException { 657 int quote = peekCharacter(); 658 char[] delimiter; 659 if (quote == '"') { 660 delimiter = DOUBLE_QUOTE; 661 } else if (quote == '\'') { 662 delimiter = SINGLE_QUOTE; 663 } else { 664 throw new XmlPullParserException("Expected a quoted string", this, null); 665 } 666 position++; 667 return readUntil(delimiter, returnText); 668 } 669 670 private void readInternalSubset() throws IOException, XmlPullParserException { 671 read('['); 672 673 while (true) { 674 skip(); 675 if (peekCharacter() == ']') { 676 position++; 677 return; 678 } 679 680 int declarationType = peekType(true); 681 switch (declarationType) { 682 case ELEMENTDECL: 683 readElementDeclaration(); 684 break; 685 686 case ATTLISTDECL: 687 readAttributeListDeclaration(); 688 break; 689 690 case ENTITYDECL: 691 readEntityDeclaration(); 692 break; 693 694 case NOTATIONDECL: 695 readNotationDeclaration(); 696 break; 697 698 case PROCESSING_INSTRUCTION: 699 read(START_PROCESSING_INSTRUCTION); 700 readUntil(END_PROCESSING_INSTRUCTION, false); 701 break; 702 703 case COMMENT: 704 readComment(false); 705 break; 706 707 case PARAMETER_ENTITY_REF: 708 throw new XmlPullParserException( 709 "Parameter entity references are not supported", this, null); 710 711 default: 712 throw new XmlPullParserException("Unexpected token", this, null); 713 } 714 } 715 } 716 717 /** 718 * Read an element declaration. This contains a name and a content spec. 719 * <!ELEMENT foo EMPTY > 720 * <!ELEMENT foo (bar?,(baz|quux)) > 721 * <!ELEMENT foo (#PCDATA|bar)* > 722 */ 723 private void readElementDeclaration() throws IOException, XmlPullParserException { 724 read(START_ELEMENT); 725 skip(); 726 readName(); 727 readContentSpec(); 728 skip(); 729 read('>'); 730 } 731 732 /** 733 * Read an element content spec. This is a regular expression-like pattern 734 * of names or other content specs. The following operators are supported: 735 * sequence: (a,b,c) 736 * choice: (a|b|c) 737 * optional: a? 738 * one or more: a+ 739 * any number: a* 740 * 741 * The special name '#PCDATA' is permitted but only if it is the first 742 * element of the first group: 743 * (#PCDATA|a|b) 744 * 745 * The top-level element must be either a choice, a sequence, or one of the 746 * special names EMPTY and ANY. 747 */ 748 private void readContentSpec() throws IOException, XmlPullParserException { 749 // this implementation is very lenient; it scans for balanced parens only 750 skip(); 751 int c = peekCharacter(); 752 if (c == '(') { 753 int depth = 0; 754 do { 755 if (c == '(') { 756 depth++; 757 } else if (c == ')') { 758 depth--; 759 } else if (c == -1) { 760 throw new XmlPullParserException( 761 "Unterminated element content spec", this, null); 762 } 763 position++; 764 c = peekCharacter(); 765 } while (depth > 0); 766 767 if (c == '*' || c == '?' || c == '+') { 768 position++; 769 } 770 } else if (c == EMPTY[0]) { 771 read(EMPTY); 772 } else if (c == ANY[0]) { 773 read(ANY); 774 } else { 775 throw new XmlPullParserException("Expected element content spec", this, null); 776 } 777 } 778 779 /** 780 * Reads an attribute list declaration such as the following: 781 * <!ATTLIST foo 782 * bar CDATA #IMPLIED 783 * quux (a|b|c) "c" 784 * baz NOTATION (a|b|c) #FIXED "c"> 785 * 786 * Each attribute has a name, type and default. 787 * 788 * Types are one of the built-in types (CDATA, ID, IDREF, IDREFS, ENTITY, 789 * ENTITIES, NMTOKEN, or NMTOKENS), an enumerated type "(list|of|options)" 790 * or NOTATION followed by an enumerated type. 791 * 792 * The default is either #REQUIRED, #IMPLIED, #FIXED, a quoted value, or 793 * #FIXED with a quoted value. 794 */ 795 private void readAttributeListDeclaration() throws IOException, XmlPullParserException { 796 read(START_ATTLIST); 797 skip(); 798 String elementName = readName(); 799 800 while (true) { 801 skip(); 802 int c = peekCharacter(); 803 if (c == '>') { 804 position++; 805 return; 806 } 807 808 // attribute name 809 String attributeName = readName(); 810 811 // attribute type 812 skip(); 813 if (position + 1 >= limit && !fillBuffer(2)) { 814 throw new XmlPullParserException("Malformed attribute list", this, null); 815 } 816 if (buffer[position] == NOTATION[0] && buffer[position + 1] == NOTATION[1]) { 817 read(NOTATION); 818 skip(); 819 } 820 c = peekCharacter(); 821 if (c == '(') { 822 position++; 823 while (true) { 824 skip(); 825 readName(); 826 skip(); 827 c = peekCharacter(); 828 if (c == ')') { 829 position++; 830 break; 831 } else if (c == '|') { 832 position++; 833 } else { 834 throw new XmlPullParserException("Malformed attribute type", this, null); 835 } 836 } 837 } else { 838 readName(); 839 } 840 841 // default value 842 skip(); 843 c = peekCharacter(); 844 if (c == '#') { 845 position++; 846 c = peekCharacter(); 847 if (c == 'R') { 848 read(REQUIRED); 849 } else if (c == 'I') { 850 read(IMPLIED); 851 } else if (c == 'F') { 852 read(FIXED); 853 } else { 854 throw new XmlPullParserException("Malformed attribute type", this, null); 855 } 856 skip(); 857 c = peekCharacter(); 858 } 859 if (c == '"' || c == '\'') { 860 position++; 861 // TODO: does this do escaping correctly? 862 String value = readValue((char) c, true, true, ValueContext.ATTRIBUTE); 863 if (peekCharacter() == c) { 864 position++; 865 } 866 defineAttributeDefault(elementName, attributeName, value); 867 } 868 } 869 } 870 871 private void defineAttributeDefault(String elementName, String attributeName, String value) { 872 if (defaultAttributes == null) { 873 defaultAttributes = new HashMap<String, Map<String, String>>(); 874 } 875 Map<String, String> elementAttributes = defaultAttributes.get(elementName); 876 if (elementAttributes == null) { 877 elementAttributes = new HashMap<String, String>(); 878 defaultAttributes.put(elementName, elementAttributes); 879 } 880 elementAttributes.put(attributeName, value); 881 } 882 883 /** 884 * Read an entity declaration. The value of internal entities are inline: 885 * <!ENTITY foo "bar"> 886 * 887 * The values of external entities must be retrieved by URL or path: 888 * <!ENTITY foo SYSTEM "http://host/file"> 889 * <!ENTITY foo PUBLIC "-//Android//Foo//EN" "http://host/file"> 890 * <!ENTITY foo SYSTEM "../file.png" NDATA png> 891 * 892 * Entities may be general or parameterized. Parameterized entities are 893 * marked by a percent sign. Such entities may only be used in the DTD: 894 * <!ENTITY % foo "bar"> 895 */ 896 private void readEntityDeclaration() throws IOException, XmlPullParserException { 897 read(START_ENTITY); 898 boolean generalEntity = true; 899 900 skip(); 901 if (peekCharacter() == '%') { 902 generalEntity = false; 903 position++; 904 skip(); 905 } 906 907 String name = readName(); 908 909 skip(); 910 int quote = peekCharacter(); 911 String entityValue; 912 if (quote == '"' || quote == '\'') { 913 position++; 914 entityValue = readValue((char) quote, true, false, ValueContext.ENTITY_DECLARATION); 915 if (peekCharacter() == quote) { 916 position++; 917 } 918 } else if (readExternalId(true, false)) { 919 /* 920 * Map external entities to the empty string. This is dishonest, 921 * but it's consistent with Android's Expat pull parser. 922 */ 923 entityValue = ""; 924 skip(); 925 if (peekCharacter() == NDATA[0]) { 926 read(NDATA); 927 skip(); 928 readName(); 929 } 930 } else { 931 throw new XmlPullParserException("Expected entity value or external ID", this, null); 932 } 933 934 if (generalEntity && processDocDecl) { 935 if (documentEntities == null) { 936 documentEntities = new HashMap<String, char[]>(); 937 } 938 documentEntities.put(name, entityValue.toCharArray()); 939 } 940 941 skip(); 942 read('>'); 943 } 944 945 private void readNotationDeclaration() throws IOException, XmlPullParserException { 946 read(START_NOTATION); 947 skip(); 948 readName(); 949 if (!readExternalId(false, false)) { 950 throw new XmlPullParserException( 951 "Expected external ID or public ID for notation", this, null); 952 } 953 skip(); 954 read('>'); 955 } 956 957 private void readEndTag() throws IOException, XmlPullParserException { 958 read('<'); 959 read('/'); 960 name = readName(); // TODO: pass the expected name in as a hint? 961 skip(); 962 read('>'); 963 964 int sp = (depth - 1) * 4; 965 966 if (depth == 0) { 967 checkRelaxed("read end tag " + name + " with no tags open"); 968 type = COMMENT; 969 return; 970 } 971 972 if (name.equals(elementStack[sp + 3])) { 973 namespace = elementStack[sp]; 974 prefix = elementStack[sp + 1]; 975 name = elementStack[sp + 2]; 976 } else if (!relaxed) { 977 throw new XmlPullParserException( 978 "expected: /" + elementStack[sp + 3] + " read: " + name, this, null); 979 } 980 } 981 982 /** 983 * Returns the type of the next token. 984 */ 985 private int peekType(boolean inDeclaration) throws IOException, XmlPullParserException { 986 if (position >= limit && !fillBuffer(1)) { 987 return END_DOCUMENT; 988 } 989 990 switch (buffer[position]) { 991 case '&': 992 return ENTITY_REF; // & 993 case '<': 994 if (position + 3 >= limit && !fillBuffer(4)) { 995 throw new XmlPullParserException("Dangling <", this, null); 996 } 997 998 switch (buffer[position + 1]) { 999 case '/': 1000 return END_TAG; // </ 1001 case '?': 1002 // we're looking for "<?xml " with case insensitivity 1003 if ((position + 5 < limit || fillBuffer(6)) 1004 && (buffer[position + 2] == 'x' || buffer[position + 2] == 'X') 1005 && (buffer[position + 3] == 'm' || buffer[position + 3] == 'M') 1006 && (buffer[position + 4] == 'l' || buffer[position + 4] == 'L') 1007 && (buffer[position + 5] == ' ')) { 1008 return XML_DECLARATION; // <?xml 1009 } else { 1010 return PROCESSING_INSTRUCTION; // <? 1011 } 1012 case '!': 1013 switch (buffer[position + 2]) { 1014 case 'D': 1015 return DOCDECL; // <!D 1016 case '[': 1017 return CDSECT; // <![ 1018 case '-': 1019 return COMMENT; // <!- 1020 case 'E': 1021 switch (buffer[position + 3]) { 1022 case 'L': 1023 return ELEMENTDECL; // <!EL 1024 case 'N': 1025 return ENTITYDECL; // <!EN 1026 } 1027 break; 1028 case 'A': 1029 return ATTLISTDECL; // <!A 1030 case 'N': 1031 return NOTATIONDECL; // <!N 1032 } 1033 throw new XmlPullParserException("Unexpected <!", this, null); 1034 default: 1035 return START_TAG; // < 1036 } 1037 case '%': 1038 return inDeclaration ? PARAMETER_ENTITY_REF : TEXT; 1039 default: 1040 return TEXT; 1041 } 1042 } 1043 1044 /** 1045 * Sets name and attributes 1046 */ 1047 private void parseStartTag(boolean xmldecl, boolean throwOnResolveFailure) 1048 throws IOException, XmlPullParserException { 1049 if (!xmldecl) { 1050 read('<'); 1051 } 1052 name = readName(); 1053 attributeCount = 0; 1054 1055 while (true) { 1056 skip(); 1057 1058 if (position >= limit && !fillBuffer(1)) { 1059 checkRelaxed(UNEXPECTED_EOF); 1060 return; 1061 } 1062 1063 int c = buffer[position]; 1064 1065 if (xmldecl) { 1066 if (c == '?') { 1067 position++; 1068 read('>'); 1069 return; 1070 } 1071 } else { 1072 if (c == '/') { 1073 degenerated = true; 1074 position++; 1075 skip(); 1076 read('>'); 1077 break; 1078 } else if (c == '>') { 1079 position++; 1080 break; 1081 } 1082 } 1083 1084 String attrName = readName(); 1085 1086 int i = (attributeCount++) * 4; 1087 attributes = ensureCapacity(attributes, i + 4); 1088 attributes[i] = ""; 1089 attributes[i + 1] = null; 1090 attributes[i + 2] = attrName; 1091 1092 skip(); 1093 if (position >= limit && !fillBuffer(1)) { 1094 checkRelaxed(UNEXPECTED_EOF); 1095 return; 1096 } 1097 1098 if (buffer[position] == '=') { 1099 position++; 1100 1101 skip(); 1102 if (position >= limit && !fillBuffer(1)) { 1103 checkRelaxed(UNEXPECTED_EOF); 1104 return; 1105 } 1106 char delimiter = buffer[position]; 1107 1108 if (delimiter == '\'' || delimiter == '"') { 1109 position++; 1110 } else if (relaxed) { 1111 delimiter = ' '; 1112 } else { 1113 throw new XmlPullParserException("attr value delimiter missing!", this, null); 1114 } 1115 1116 attributes[i + 3] = readValue(delimiter, true, throwOnResolveFailure, 1117 ValueContext.ATTRIBUTE); 1118 1119 if (delimiter != ' ' && peekCharacter() == delimiter) { 1120 position++; // end quote 1121 } 1122 } else if (relaxed) { 1123 attributes[i + 3] = attrName; 1124 } else { 1125 checkRelaxed("Attr.value missing f. " + attrName); 1126 attributes[i + 3] = attrName; 1127 } 1128 } 1129 1130 int sp = depth++ * 4; 1131 elementStack = ensureCapacity(elementStack, sp + 4); 1132 elementStack[sp + 3] = name; 1133 1134 if (depth >= nspCounts.length) { 1135 int[] bigger = new int[depth + 4]; 1136 System.arraycopy(nspCounts, 0, bigger, 0, nspCounts.length); 1137 nspCounts = bigger; 1138 } 1139 1140 nspCounts[depth] = nspCounts[depth - 1]; 1141 1142 if (processNsp) { 1143 adjustNsp(); 1144 } else { 1145 namespace = ""; 1146 } 1147 1148 // For consistency with Expat, add default attributes after fixing namespaces. 1149 if (defaultAttributes != null) { 1150 Map<String, String> elementDefaultAttributes = defaultAttributes.get(name); 1151 if (elementDefaultAttributes != null) { 1152 for (Map.Entry<String, String> entry : elementDefaultAttributes.entrySet()) { 1153 if (getAttributeValue(null, entry.getKey()) != null) { 1154 continue; // an explicit value overrides the default 1155 } 1156 1157 int i = (attributeCount++) * 4; 1158 attributes = ensureCapacity(attributes, i + 4); 1159 attributes[i] = ""; 1160 attributes[i + 1] = null; 1161 attributes[i + 2] = entry.getKey(); 1162 attributes[i + 3] = entry.getValue(); 1163 } 1164 } 1165 } 1166 1167 elementStack[sp] = namespace; 1168 elementStack[sp + 1] = prefix; 1169 elementStack[sp + 2] = name; 1170 } 1171 1172 /** 1173 * Reads an entity reference from the buffer, resolves it, and writes the 1174 * resolved entity to {@code out}. If the entity cannot be read or resolved, 1175 * {@code out} will contain the partial entity reference. 1176 */ 1177 private void readEntity(StringBuilder out, boolean isEntityToken, boolean throwOnResolveFailure, 1178 ValueContext valueContext) throws IOException, XmlPullParserException { 1179 int start = out.length(); 1180 1181 if (buffer[position++] != '&') { 1182 throw new AssertionError(); 1183 } 1184 1185 out.append('&'); 1186 1187 while (true) { 1188 int c = peekCharacter(); 1189 1190 if (c == ';') { 1191 out.append(';'); 1192 position++; 1193 break; 1194 1195 } else if (c >= 128 1196 || (c >= '0' && c <= '9') 1197 || (c >= 'a' && c <= 'z') 1198 || (c >= 'A' && c <= 'Z') 1199 || c == '_' 1200 || c == '-' 1201 || c == '#') { 1202 position++; 1203 out.append((char) c); 1204 1205 } else if (relaxed) { 1206 // intentionally leave the partial reference in 'out' 1207 return; 1208 1209 } else { 1210 throw new XmlPullParserException("unterminated entity ref", this, null); 1211 } 1212 } 1213 1214 String code = out.substring(start + 1, out.length() - 1); 1215 1216 if (isEntityToken) { 1217 name = code; 1218 } 1219 1220 if (code.startsWith("#")) { 1221 try { 1222 int c = code.startsWith("#x") 1223 ? Integer.parseInt(code.substring(2), 16) 1224 : Integer.parseInt(code.substring(1)); 1225 out.delete(start, out.length()); 1226 out.appendCodePoint(c); 1227 unresolved = false; 1228 return; 1229 } catch (NumberFormatException notANumber) { 1230 throw new XmlPullParserException("Invalid character reference: &" + code); 1231 } catch (IllegalArgumentException invalidCodePoint) { 1232 throw new XmlPullParserException("Invalid character reference: &" + code); 1233 } 1234 } 1235 1236 if (valueContext == ValueContext.ENTITY_DECLARATION) { 1237 // keep the unresolved &code; in the text to resolve later 1238 return; 1239 } 1240 1241 String defaultEntity = DEFAULT_ENTITIES.get(code); 1242 if (defaultEntity != null) { 1243 out.delete(start, out.length()); 1244 unresolved = false; 1245 out.append(defaultEntity); 1246 return; 1247 } 1248 1249 char[] resolved; 1250 if (documentEntities != null && (resolved = documentEntities.get(code)) != null) { 1251 out.delete(start, out.length()); 1252 unresolved = false; 1253 if (processDocDecl) { 1254 pushContentSource(resolved); // parse the entity as XML 1255 } else { 1256 out.append(resolved); // include the entity value as text 1257 } 1258 return; 1259 } 1260 1261 /* 1262 * The parser skipped an external DTD, and now we've encountered an 1263 * unknown entity that could have been declared there. Map it to the 1264 * empty string. This is dishonest, but it's consistent with Android's 1265 * old ExpatPullParser. 1266 */ 1267 if (systemId != null) { 1268 out.delete(start, out.length()); 1269 return; 1270 } 1271 1272 // keep the unresolved entity "&code;" in the text for relaxed clients 1273 unresolved = true; 1274 if (throwOnResolveFailure) { 1275 checkRelaxed("unresolved: &" + code + ";"); 1276 } 1277 } 1278 1279 /** 1280 * Where a value is found impacts how that value is interpreted. For 1281 * example, in attributes, "\n" must be replaced with a space character. In 1282 * text, "]]>" is forbidden. In entity declarations, named references are 1283 * not resolved. 1284 */ 1285 enum ValueContext { 1286 ATTRIBUTE, 1287 TEXT, 1288 ENTITY_DECLARATION 1289 } 1290 1291 /** 1292 * Returns the current text or attribute value. This also has the side 1293 * effect of setting isWhitespace to false if a non-whitespace character is 1294 * encountered. 1295 * 1296 * @param delimiter {@code <} for text, {@code "} and {@code '} for quoted 1297 * attributes, or a space for unquoted attributes. 1298 */ 1299 private String readValue(char delimiter, boolean resolveEntities, boolean throwOnResolveFailure, 1300 ValueContext valueContext) throws IOException, XmlPullParserException { 1301 1302 /* 1303 * This method returns all of the characters from the current position 1304 * through to an appropriate delimiter. 1305 * 1306 * If we're lucky (which we usually are), we'll return a single slice of 1307 * the buffer. This fast path avoids allocating a string builder. 1308 * 1309 * There are 6 unlucky characters we could encounter: 1310 * - "&": entities must be resolved. 1311 * - "%": parameter entities are unsupported in entity values. 1312 * - "<": this isn't permitted in attributes unless relaxed. 1313 * - "]": this requires a lookahead to defend against the forbidden 1314 * CDATA section delimiter "]]>". 1315 * - "\r": If a "\r" is followed by a "\n", we discard the "\r". If it 1316 * isn't followed by "\n", we replace "\r" with either a "\n" 1317 * in text nodes or a space in attribute values. 1318 * - "\n": In attribute values, "\n" must be replaced with a space. 1319 * 1320 * We could also get unlucky by needing to refill the buffer midway 1321 * through the text. 1322 */ 1323 1324 int start = position; 1325 StringBuilder result = null; 1326 1327 // if a text section was already started, prefix the start 1328 if (valueContext == ValueContext.TEXT && text != null) { 1329 result = new StringBuilder(); 1330 result.append(text); 1331 } 1332 1333 while (true) { 1334 1335 /* 1336 * Make sure we have at least a single character to read from the 1337 * buffer. This mutates the buffer, so save the partial result 1338 * to the slow path string builder first. 1339 */ 1340 if (position >= limit) { 1341 if (start < position) { 1342 if (result == null) { 1343 result = new StringBuilder(); 1344 } 1345 result.append(buffer, start, position - start); 1346 } 1347 if (!fillBuffer(1)) { 1348 return result != null ? result.toString() : ""; 1349 } 1350 start = position; 1351 } 1352 1353 char c = buffer[position]; 1354 1355 if (c == delimiter 1356 || (delimiter == ' ' && (c <= ' ' || c == '>')) 1357 || c == '&' && !resolveEntities) { 1358 break; 1359 } 1360 1361 if (c != '\r' 1362 && (c != '\n' || valueContext != ValueContext.ATTRIBUTE) 1363 && c != '&' 1364 && c != '<' 1365 && (c != ']' || valueContext != ValueContext.TEXT) 1366 && (c != '%' || valueContext != ValueContext.ENTITY_DECLARATION)) { 1367 isWhitespace &= (c <= ' '); 1368 position++; 1369 continue; 1370 } 1371 1372 /* 1373 * We've encountered an unlucky character! Convert from fast 1374 * path to slow path if we haven't done so already. 1375 */ 1376 if (result == null) { 1377 result = new StringBuilder(); 1378 } 1379 result.append(buffer, start, position - start); 1380 1381 if (c == '\r') { 1382 if ((position + 1 < limit || fillBuffer(2)) && buffer[position + 1] == '\n') { 1383 position++; 1384 } 1385 c = (valueContext == ValueContext.ATTRIBUTE) ? ' ' : '\n'; 1386 1387 } else if (c == '\n') { 1388 c = ' '; 1389 1390 } else if (c == '&') { 1391 isWhitespace = false; // TODO: what if the entity resolves to whitespace? 1392 readEntity(result, false, throwOnResolveFailure, valueContext); 1393 start = position; 1394 continue; 1395 1396 } else if (c == '<') { 1397 if (valueContext == ValueContext.ATTRIBUTE) { 1398 checkRelaxed("Illegal: \"<\" inside attribute value"); 1399 } 1400 isWhitespace = false; 1401 1402 } else if (c == ']') { 1403 if ((position + 2 < limit || fillBuffer(3)) 1404 && buffer[position + 1] == ']' && buffer[position + 2] == '>') { 1405 checkRelaxed("Illegal: \"]]>\" outside CDATA section"); 1406 } 1407 isWhitespace = false; 1408 1409 } else if (c == '%') { 1410 throw new XmlPullParserException("This parser doesn't support parameter entities", 1411 this, null); 1412 1413 } else { 1414 throw new AssertionError(); 1415 } 1416 1417 position++; 1418 result.append(c); 1419 start = position; 1420 } 1421 1422 if (result == null) { 1423 return stringPool.get(buffer, start, position - start); 1424 } else { 1425 result.append(buffer, start, position - start); 1426 return result.toString(); 1427 } 1428 } 1429 1430 private void read(char expected) throws IOException, XmlPullParserException { 1431 int c = peekCharacter(); 1432 if (c != expected) { 1433 checkRelaxed("expected: '" + expected + "' actual: '" + ((char) c) + "'"); 1434 if (c == -1) { 1435 return; // On EOF, don't move position beyond limit 1436 } 1437 } 1438 position++; 1439 } 1440 1441 private void read(char[] chars) throws IOException, XmlPullParserException { 1442 if (position + chars.length > limit && !fillBuffer(chars.length)) { 1443 checkRelaxed("expected: '" + new String(chars) + "' but was EOF"); 1444 return; 1445 } 1446 1447 // TODO: replace with Arrays.equals(buffer, position, delimiter, 0, delimiter.length) 1448 // when the VM has better method inlining 1449 for (int i = 0; i < chars.length; i++) { 1450 if (buffer[position + i] != chars[i]) { 1451 checkRelaxed("expected: \"" + new String(chars) + "\" but was \"" 1452 + new String(buffer, position, chars.length) + "...\""); 1453 } 1454 } 1455 1456 position += chars.length; 1457 } 1458 1459 private int peekCharacter() throws IOException, XmlPullParserException { 1460 if (position < limit || fillBuffer(1)) { 1461 return buffer[position]; 1462 } 1463 return -1; 1464 } 1465 1466 /** 1467 * Returns true once {@code limit - position >= minimum}. If the data is 1468 * exhausted before that many characters are available, this returns 1469 * false. 1470 */ 1471 private boolean fillBuffer(int minimum) throws IOException, XmlPullParserException { 1472 // If we've exhausted the current content source, remove it 1473 while (nextContentSource != null) { 1474 if (position < limit) { 1475 throw new XmlPullParserException("Unbalanced entity!", this, null); 1476 } 1477 popContentSource(); 1478 if (limit - position >= minimum) { 1479 return true; 1480 } 1481 } 1482 1483 // Before clobbering the old characters, update where buffer starts 1484 for (int i = 0; i < position; i++) { 1485 if (buffer[i] == '\n') { 1486 bufferStartLine++; 1487 bufferStartColumn = 0; 1488 } else { 1489 bufferStartColumn++; 1490 } 1491 } 1492 1493 if (bufferCapture != null) { 1494 bufferCapture.append(buffer, 0, position); 1495 } 1496 1497 if (limit != position) { 1498 limit -= position; 1499 System.arraycopy(buffer, position, buffer, 0, limit); 1500 } else { 1501 limit = 0; 1502 } 1503 1504 position = 0; 1505 int total; 1506 while ((total = reader.read(buffer, limit, buffer.length - limit)) != -1) { 1507 limit += total; 1508 if (limit >= minimum) { 1509 return true; 1510 } 1511 } 1512 return false; 1513 } 1514 1515 /** 1516 * Returns an element or attribute name. This is always non-empty for 1517 * non-relaxed parsers. 1518 */ 1519 private String readName() throws IOException, XmlPullParserException { 1520 if (position >= limit && !fillBuffer(1)) { 1521 checkRelaxed("name expected"); 1522 return ""; 1523 } 1524 1525 int start = position; 1526 StringBuilder result = null; 1527 1528 // read the first character 1529 char c = buffer[position]; 1530 if ((c >= 'a' && c <= 'z') 1531 || (c >= 'A' && c <= 'Z') 1532 || c == '_' 1533 || c == ':' 1534 || c >= '\u00c0' // TODO: check the XML spec 1535 || relaxed) { 1536 position++; 1537 } else { 1538 checkRelaxed("name expected"); 1539 return ""; 1540 } 1541 1542 while (true) { 1543 /* 1544 * Make sure we have at least a single character to read from the 1545 * buffer. This mutates the buffer, so save the partial result 1546 * to the slow path string builder first. 1547 */ 1548 if (position >= limit) { 1549 if (result == null) { 1550 result = new StringBuilder(); 1551 } 1552 result.append(buffer, start, position - start); 1553 if (!fillBuffer(1)) { 1554 return result.toString(); 1555 } 1556 start = position; 1557 } 1558 1559 // read another character 1560 c = buffer[position]; 1561 if ((c >= 'a' && c <= 'z') 1562 || (c >= 'A' && c <= 'Z') 1563 || (c >= '0' && c <= '9') 1564 || c == '_' 1565 || c == '-' 1566 || c == ':' 1567 || c == '.' 1568 || c >= '\u00b7') { // TODO: check the XML spec 1569 position++; 1570 continue; 1571 } 1572 1573 // we encountered a non-name character. done! 1574 if (result == null) { 1575 return stringPool.get(buffer, start, position - start); 1576 } else { 1577 result.append(buffer, start, position - start); 1578 return result.toString(); 1579 } 1580 } 1581 } 1582 1583 private void skip() throws IOException, XmlPullParserException { 1584 while (position < limit || fillBuffer(1)) { 1585 int c = buffer[position]; 1586 if (c > ' ') { 1587 break; 1588 } 1589 position++; 1590 } 1591 } 1592 1593 // public part starts here... 1594 1595 public void setInput(Reader reader) throws XmlPullParserException { 1596 this.reader = reader; 1597 1598 type = START_DOCUMENT; 1599 name = null; 1600 namespace = null; 1601 degenerated = false; 1602 attributeCount = -1; 1603 encoding = null; 1604 version = null; 1605 standalone = null; 1606 1607 if (reader == null) { 1608 return; 1609 } 1610 1611 position = 0; 1612 limit = 0; 1613 bufferStartLine = 0; 1614 bufferStartColumn = 0; 1615 depth = 0; 1616 documentEntities = null; 1617 } 1618 1619 public void setInput(InputStream is, String charset) throws XmlPullParserException { 1620 position = 0; 1621 limit = 0; 1622 boolean detectCharset = (charset == null); 1623 1624 if (is == null) { 1625 throw new IllegalArgumentException("is == null"); 1626 } 1627 1628 try { 1629 if (detectCharset) { 1630 // read the four bytes looking for an indication of the encoding in use 1631 int firstFourBytes = 0; 1632 while (limit < 4) { 1633 int i = is.read(); 1634 if (i == -1) { 1635 break; 1636 } 1637 firstFourBytes = (firstFourBytes << 8) | i; 1638 buffer[limit++] = (char) i; 1639 } 1640 1641 if (limit == 4) { 1642 switch (firstFourBytes) { 1643 case 0x00000FEFF: // UTF-32BE BOM 1644 charset = "UTF-32BE"; 1645 limit = 0; 1646 break; 1647 1648 case 0x0FFFE0000: // UTF-32LE BOM 1649 charset = "UTF-32LE"; 1650 limit = 0; 1651 break; 1652 1653 case 0x0000003c: // '<' in UTF-32BE 1654 charset = "UTF-32BE"; 1655 buffer[0] = '<'; 1656 limit = 1; 1657 break; 1658 1659 case 0x03c000000: // '<' in UTF-32LE 1660 charset = "UTF-32LE"; 1661 buffer[0] = '<'; 1662 limit = 1; 1663 break; 1664 1665 case 0x0003c003f: // "<?" in UTF-16BE 1666 charset = "UTF-16BE"; 1667 buffer[0] = '<'; 1668 buffer[1] = '?'; 1669 limit = 2; 1670 break; 1671 1672 case 0x03c003f00: // "<?" in UTF-16LE 1673 charset = "UTF-16LE"; 1674 buffer[0] = '<'; 1675 buffer[1] = '?'; 1676 limit = 2; 1677 break; 1678 1679 case 0x03c3f786d: // "<?xm" in ASCII etc. 1680 while (true) { 1681 int i = is.read(); 1682 if (i == -1) { 1683 break; 1684 } 1685 buffer[limit++] = (char) i; 1686 if (i == '>') { 1687 String s = new String(buffer, 0, limit); 1688 int i0 = s.indexOf("encoding"); 1689 if (i0 != -1) { 1690 while (s.charAt(i0) != '"' && s.charAt(i0) != '\'') { 1691 i0++; 1692 } 1693 char deli = s.charAt(i0++); 1694 int i1 = s.indexOf(deli, i0); 1695 charset = s.substring(i0, i1); 1696 } 1697 break; 1698 } 1699 } 1700 break; 1701 1702 default: 1703 // handle a byte order mark followed by something other than <? 1704 if ((firstFourBytes & 0x0ffff0000) == 0x0feff0000) { 1705 charset = "UTF-16BE"; 1706 buffer[0] = (char) ((buffer[2] << 8) | buffer[3]); 1707 limit = 1; 1708 } else if ((firstFourBytes & 0x0ffff0000) == 0x0fffe0000) { 1709 charset = "UTF-16LE"; 1710 buffer[0] = (char) ((buffer[3] << 8) | buffer[2]); 1711 limit = 1; 1712 } else if ((firstFourBytes & 0x0ffffff00) == 0x0efbbbf00) { 1713 charset = "UTF-8"; 1714 buffer[0] = buffer[3]; 1715 limit = 1; 1716 } 1717 } 1718 } 1719 } 1720 1721 if (charset == null) { 1722 charset = "UTF-8"; 1723 } 1724 1725 int savedLimit = limit; 1726 setInput(new InputStreamReader(is, charset)); 1727 encoding = charset; 1728 limit = savedLimit; 1729 1730 /* 1731 * Skip the optional BOM if we didn't above. This decrements limit 1732 * rather than incrementing position so that <?xml version='1.0'?> 1733 * is still at character 0. 1734 */ 1735 if (!detectCharset && peekCharacter() == 0xfeff) { 1736 limit--; 1737 System.arraycopy(buffer, 1, buffer, 0, limit); 1738 } 1739 } catch (Exception e) { 1740 throw new XmlPullParserException("Invalid stream or encoding: " + e, this, e); 1741 } 1742 } 1743 1744 public void close() throws IOException { 1745 if (reader != null) { 1746 reader.close(); 1747 } 1748 } 1749 1750 public boolean getFeature(String feature) { 1751 if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { 1752 return processNsp; 1753 } else if (FEATURE_RELAXED.equals(feature)) { 1754 return relaxed; 1755 } else if (FEATURE_PROCESS_DOCDECL.equals(feature)) { 1756 return processDocDecl; 1757 } else { 1758 return false; 1759 } 1760 } 1761 1762 public String getInputEncoding() { 1763 return encoding; 1764 } 1765 1766 public void defineEntityReplacementText(String entity, String value) 1767 throws XmlPullParserException { 1768 if (processDocDecl) { 1769 throw new IllegalStateException( 1770 "Entity replacement text may not be defined with DOCTYPE processing enabled."); 1771 } 1772 if (reader == null) { 1773 throw new IllegalStateException( 1774 "Entity replacement text must be defined after setInput()"); 1775 } 1776 if (documentEntities == null) { 1777 documentEntities = new HashMap<String, char[]>(); 1778 } 1779 documentEntities.put(entity, value.toCharArray()); 1780 } 1781 1782 public Object getProperty(String property) { 1783 if (property.equals(PROPERTY_XMLDECL_VERSION)) { 1784 return version; 1785 } else if (property.equals(PROPERTY_XMLDECL_STANDALONE)) { 1786 return standalone; 1787 } else if (property.equals(PROPERTY_LOCATION)) { 1788 return location != null ? location : reader.toString(); 1789 } else { 1790 return null; 1791 } 1792 } 1793 1794 /** 1795 * Returns the root element's name if it was declared in the DTD. This 1796 * equals the first tag's name for valid documents. 1797 */ 1798 public String getRootElementName() { 1799 return rootElementName; 1800 } 1801 1802 /** 1803 * Returns the document's system ID if it was declared. This is typically a 1804 * string like {@code http://www.w3.org/TR/html4/strict.dtd}. 1805 */ 1806 public String getSystemId() { 1807 return systemId; 1808 } 1809 1810 /** 1811 * Returns the document's public ID if it was declared. This is typically a 1812 * string like {@code -//W3C//DTD HTML 4.01//EN}. 1813 */ 1814 public String getPublicId() { 1815 return publicId; 1816 } 1817 1818 public int getNamespaceCount(int depth) { 1819 if (depth > this.depth) { 1820 throw new IndexOutOfBoundsException(); 1821 } 1822 return nspCounts[depth]; 1823 } 1824 1825 public String getNamespacePrefix(int pos) { 1826 return nspStack[pos * 2]; 1827 } 1828 1829 public String getNamespaceUri(int pos) { 1830 return nspStack[(pos * 2) + 1]; 1831 } 1832 1833 public String getNamespace(String prefix) { 1834 if ("xml".equals(prefix)) { 1835 return "http://www.w3.org/XML/1998/namespace"; 1836 } 1837 if ("xmlns".equals(prefix)) { 1838 return "http://www.w3.org/2000/xmlns/"; 1839 } 1840 1841 for (int i = (getNamespaceCount(depth) << 1) - 2; i >= 0; i -= 2) { 1842 if (prefix == null) { 1843 if (nspStack[i] == null) { 1844 return nspStack[i + 1]; 1845 } 1846 } else if (prefix.equals(nspStack[i])) { 1847 return nspStack[i + 1]; 1848 } 1849 } 1850 return null; 1851 } 1852 1853 public int getDepth() { 1854 return depth; 1855 } 1856 1857 public String getPositionDescription() { 1858 StringBuilder buf = new StringBuilder(type < TYPES.length ? TYPES[type] : "unknown"); 1859 buf.append(' '); 1860 1861 if (type == START_TAG || type == END_TAG) { 1862 if (degenerated) { 1863 buf.append("(empty) "); 1864 } 1865 buf.append('<'); 1866 if (type == END_TAG) { 1867 buf.append('/'); 1868 } 1869 1870 if (prefix != null) { 1871 buf.append("{" + namespace + "}" + prefix + ":"); 1872 } 1873 buf.append(name); 1874 1875 int cnt = attributeCount * 4; 1876 for (int i = 0; i < cnt; i += 4) { 1877 buf.append(' '); 1878 if (attributes[i + 1] != null) { 1879 buf.append("{" + attributes[i] + "}" + attributes[i + 1] + ":"); 1880 } 1881 buf.append(attributes[i + 2] + "='" + attributes[i + 3] + "'"); 1882 } 1883 1884 buf.append('>'); 1885 } else if (type == IGNORABLE_WHITESPACE) { 1886 ; 1887 } else if (type != TEXT) { 1888 buf.append(getText()); 1889 } else if (isWhitespace) { 1890 buf.append("(whitespace)"); 1891 } else { 1892 String text = getText(); 1893 if (text.length() > 16) { 1894 text = text.substring(0, 16) + "..."; 1895 } 1896 buf.append(text); 1897 } 1898 1899 buf.append("@" + getLineNumber() + ":" + getColumnNumber()); 1900 if (location != null) { 1901 buf.append(" in "); 1902 buf.append(location); 1903 } else if (reader != null) { 1904 buf.append(" in "); 1905 buf.append(reader.toString()); 1906 } 1907 return buf.toString(); 1908 } 1909 1910 public int getLineNumber() { 1911 int result = bufferStartLine; 1912 for (int i = 0; i < position; i++) { 1913 if (buffer[i] == '\n') { 1914 result++; 1915 } 1916 } 1917 return result + 1; // the first line is '1' 1918 } 1919 1920 public int getColumnNumber() { 1921 int result = bufferStartColumn; 1922 for (int i = 0; i < position; i++) { 1923 if (buffer[i] == '\n') { 1924 result = 0; 1925 } else { 1926 result++; 1927 } 1928 } 1929 return result + 1; // the first column is '1' 1930 } 1931 1932 public boolean isWhitespace() throws XmlPullParserException { 1933 if (type != TEXT && type != IGNORABLE_WHITESPACE && type != CDSECT) { 1934 throw new XmlPullParserException(ILLEGAL_TYPE, this, null); 1935 } 1936 return isWhitespace; 1937 } 1938 1939 public String getText() { 1940 if (type < TEXT || (type == ENTITY_REF && unresolved)) { 1941 return null; 1942 } else if (text == null) { 1943 return ""; 1944 } else { 1945 return text; 1946 } 1947 } 1948 1949 public char[] getTextCharacters(int[] poslen) { 1950 String text = getText(); 1951 if (text == null) { 1952 poslen[0] = -1; 1953 poslen[1] = -1; 1954 return null; 1955 } 1956 char[] result = text.toCharArray(); 1957 poslen[0] = 0; 1958 poslen[1] = result.length; 1959 return result; 1960 } 1961 1962 public String getNamespace() { 1963 return namespace; 1964 } 1965 1966 public String getName() { 1967 return name; 1968 } 1969 1970 public String getPrefix() { 1971 return prefix; 1972 } 1973 1974 public boolean isEmptyElementTag() throws XmlPullParserException { 1975 if (type != START_TAG) { 1976 throw new XmlPullParserException(ILLEGAL_TYPE, this, null); 1977 } 1978 return degenerated; 1979 } 1980 1981 public int getAttributeCount() { 1982 return attributeCount; 1983 } 1984 1985 public String getAttributeType(int index) { 1986 return "CDATA"; 1987 } 1988 1989 public boolean isAttributeDefault(int index) { 1990 return false; 1991 } 1992 1993 public String getAttributeNamespace(int index) { 1994 if (index >= attributeCount) { 1995 throw new IndexOutOfBoundsException(); 1996 } 1997 return attributes[index * 4]; 1998 } 1999 2000 public String getAttributeName(int index) { 2001 if (index >= attributeCount) { 2002 throw new IndexOutOfBoundsException(); 2003 } 2004 return attributes[(index * 4) + 2]; 2005 } 2006 2007 public String getAttributePrefix(int index) { 2008 if (index >= attributeCount) { 2009 throw new IndexOutOfBoundsException(); 2010 } 2011 return attributes[(index * 4) + 1]; 2012 } 2013 2014 public String getAttributeValue(int index) { 2015 if (index >= attributeCount) { 2016 throw new IndexOutOfBoundsException(); 2017 } 2018 return attributes[(index * 4) + 3]; 2019 } 2020 2021 public String getAttributeValue(String namespace, String name) { 2022 for (int i = (attributeCount * 4) - 4; i >= 0; i -= 4) { 2023 if (attributes[i + 2].equals(name) 2024 && (namespace == null || attributes[i].equals(namespace))) { 2025 return attributes[i + 3]; 2026 } 2027 } 2028 2029 return null; 2030 } 2031 2032 public int getEventType() throws XmlPullParserException { 2033 return type; 2034 } 2035 2036 // utility methods to make XML parsing easier ... 2037 2038 public int nextTag() throws XmlPullParserException, IOException { 2039 next(); 2040 if (type == TEXT && isWhitespace) { 2041 next(); 2042 } 2043 2044 if (type != END_TAG && type != START_TAG) { 2045 throw new XmlPullParserException("unexpected type", this, null); 2046 } 2047 2048 return type; 2049 } 2050 2051 public void require(int type, String namespace, String name) 2052 throws XmlPullParserException, IOException { 2053 if (type != this.type 2054 || (namespace != null && !namespace.equals(getNamespace())) 2055 || (name != null && !name.equals(getName()))) { 2056 throw new XmlPullParserException( 2057 "expected: " + TYPES[type] + " {" + namespace + "}" + name, this, null); 2058 } 2059 } 2060 2061 public String nextText() throws XmlPullParserException, IOException { 2062 if (type != START_TAG) { 2063 throw new XmlPullParserException("precondition: START_TAG", this, null); 2064 } 2065 2066 next(); 2067 2068 String result; 2069 if (type == TEXT) { 2070 result = getText(); 2071 next(); 2072 } else { 2073 result = ""; 2074 } 2075 2076 if (type != END_TAG) { 2077 throw new XmlPullParserException("END_TAG expected", this, null); 2078 } 2079 2080 return result; 2081 } 2082 2083 public void setFeature(String feature, boolean value) throws XmlPullParserException { 2084 if (XmlPullParser.FEATURE_PROCESS_NAMESPACES.equals(feature)) { 2085 processNsp = value; 2086 } else if (XmlPullParser.FEATURE_PROCESS_DOCDECL.equals(feature)) { 2087 processDocDecl = value; 2088 } else if (FEATURE_RELAXED.equals(feature)) { 2089 relaxed = value; 2090 } else { 2091 throw new XmlPullParserException("unsupported feature: " + feature, this, null); 2092 } 2093 } 2094 2095 public void setProperty(String property, Object value) throws XmlPullParserException { 2096 if (property.equals(PROPERTY_LOCATION)) { 2097 location = String.valueOf(value); 2098 } else { 2099 throw new XmlPullParserException("unsupported property: " + property); 2100 } 2101 } 2102 2103 /** 2104 * A chain of buffers containing XML content. Each content source contains 2105 * the parser's primary read buffer or the characters of entities actively 2106 * being parsed. 2107 * 2108 * <p>For example, note the buffers needed to parse this document: 2109 * <pre> {@code 2110 * <!DOCTYPE foo [ 2111 * <!ENTITY baz "ghi"> 2112 * <!ENTITY bar "def &baz; jkl"> 2113 * ]> 2114 * <foo>abc &bar; mno</foo> 2115 * }</pre> 2116 * 2117 * <p>Things get interesting when the bar entity is encountered. At that 2118 * point two buffers are active: 2119 * <ol> 2120 * <li>The value for the bar entity, containing {@code "def &baz; jkl"} 2121 * <li>The parser's primary read buffer, containing {@code " mno</foo>"} 2122 * </ol> 2123 * <p>The parser will return the characters {@code "def "} from the bar 2124 * entity's buffer, and then it will encounter the baz entity. To handle 2125 * that, three buffers will be active: 2126 * <ol> 2127 * <li>The value for the baz entity, containing {@code "ghi"} 2128 * <li>The remaining value for the bar entity, containing {@code " jkl"} 2129 * <li>The parser's primary read buffer, containing {@code " mno</foo>"} 2130 * </ol> 2131 * <p>The parser will then return the characters {@code ghi jkl mno} in that 2132 * sequence by reading each buffer in sequence. 2133 */ 2134 static class ContentSource { 2135 private final ContentSource next; 2136 private final char[] buffer; 2137 private final int position; 2138 private final int limit; 2139 ContentSource(ContentSource next, char[] buffer, int position, int limit) { 2140 this.next = next; 2141 this.buffer = buffer; 2142 this.position = position; 2143 this.limit = limit; 2144 } 2145 } 2146 2147 /** 2148 * Prepends the characters of {@code newBuffer} to be read before the 2149 * current buffer. 2150 */ 2151 private void pushContentSource(char[] newBuffer) { 2152 nextContentSource = new ContentSource(nextContentSource, buffer, position, limit); 2153 buffer = newBuffer; 2154 position = 0; 2155 limit = newBuffer.length; 2156 } 2157 2158 /** 2159 * Replaces the current exhausted buffer with the next buffer in the chain. 2160 */ 2161 private void popContentSource() { 2162 buffer = nextContentSource.buffer; 2163 position = nextContentSource.position; 2164 limit = nextContentSource.limit; 2165 nextContentSource = nextContentSource.next; 2166 } 2167 } 2168